diff --git a/docs/how-to-debug-a-hyperlight-guest.md b/docs/how-to-debug-a-hyperlight-guest.md index 6707df518..0cf2e0753 100644 --- a/docs/how-to-debug-a-hyperlight-guest.md +++ b/docs/how-to-debug-a-hyperlight-guest.md @@ -1,12 +1,12 @@ -# How to debug a Hyperlight guest using gdb on Linux +# How to debug a Hyperlight guest using gdb or lldb -Hyperlight supports gdb debugging of a **KVM** or **MSHV** guest running inside a Hyperlight sandbox on Linux. +Hyperlight supports gdb debugging of a guest running inside a Hyperlight sandbox on Linux or Windows. When Hyperlight is compiled with the `gdb` feature enabled, a Hyperlight sandbox can be configured to start listening for a gdb connection. ## Supported features -The Hyperlight `gdb` feature enables **KVM** and **MSHV** guest debugging to: +The Hyperlight `gdb` feature enables guest debugging to: - stop at an entry point breakpoint which is automatically set by Hyperlight - add and remove HW breakpoints (maximum 4 set breakpoints at a time) - add and remove SW breakpoints @@ -19,7 +19,7 @@ The Hyperlight `gdb` feature enables **KVM** and **MSHV** guest debugging to: ## Expected behavior Below is a list describing some cases of expected behavior from a gdb debug -session of a guest binary running inside a Hyperlight sandbox on Linux. +session of a guest binary running inside a Hyperlight sandbox. - when the `gdb` feature is enabled and a SandboxConfiguration is provided a debug port, the created sandbox will wait for a gdb client to connect on the diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 700aa3727..3dfd9482b 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -21,6 +21,8 @@ bench = false # see https://bheisler.github.io/criterion.rs/book/faq.html#cargo- workspace = true [dependencies] +gdbstub = { version = "0.7.6", optional = true } +gdbstub_arch = { version = "0.3.2", optional = true } goblin = { version = "0.10" } rand = { version = "0.9" } cfg-if = { version = "1.0.1" } @@ -67,8 +69,6 @@ windows-version = "0.1" lazy_static = "1.4.0" [target.'cfg(unix)'.dependencies] -gdbstub = { version = "0.7.6", optional = true } -gdbstub_arch = { version = "0.3.2", optional = true } seccompiler = { version = "0.5.0", optional = true } kvm-bindings = { version = "0.13", features = ["fam-wrappers"], optional = true } kvm-ioctls = { version = "0.23", optional = true } diff --git a/src/hyperlight_host/build.rs b/src/hyperlight_host/build.rs index 75f9eba53..ca2fd8bf2 100644 --- a/src/hyperlight_host/build.rs +++ b/src/hyperlight_host/build.rs @@ -90,7 +90,7 @@ fn main() -> Result<()> { // Essentially the kvm and mshv features are ignored on windows as long as you use #[cfg(kvm)] and not #[cfg(feature = "kvm")]. // You should never use #[cfg(feature = "kvm")] or #[cfg(feature = "mshv")] in the codebase. cfg_aliases::cfg_aliases! { - gdb: { all(feature = "gdb", debug_assertions, any(feature = "kvm", feature = "mshv2", feature = "mshv3"), target_os = "linux") }, + gdb: { all(feature = "gdb", debug_assertions) }, kvm: { all(feature = "kvm", target_os = "linux") }, mshv: { all(any(feature = "mshv2", feature = "mshv3"), target_os = "linux") }, crashdump: { all(feature = "crashdump") }, diff --git a/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs b/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs index c39f7c06e..69dec3407 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs @@ -19,11 +19,21 @@ use gdbstub::conn::ConnectionExt; use gdbstub::stub::{ BaseStopReason, DisconnectReason, GdbStub, SingleThreadStopReason, run_blocking, }; -use libc::{SIGRTMIN, pthread_kill}; use super::x86_64_target::HyperlightSandboxTarget; use super::{DebugResponse, GdbTargetError, VcpuStopReason}; +// Signals are defined differently on Windows and Linux, so we use conditional compilation +#[cfg(target_os = "linux")] +mod signals { + pub use libc::{SIGINT, SIGSEGV}; +} +#[cfg(windows)] +mod signals { + pub const SIGINT: i8 = 2; + pub const SIGSEGV: i8 = 11; +} + struct GdbBlockingEventLoop; impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop { @@ -56,11 +66,11 @@ impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop { // to the target thread VcpuStopReason::Interrupt => BaseStopReason::SignalWithThread { tid: (), - signal: Signal(SIGRTMIN() as u8), + signal: Signal(signals::SIGINT as u8), }, VcpuStopReason::Crash => BaseStopReason::SignalWithThread { tid: (), - signal: Signal(11), + signal: Signal(signals::SIGSEGV as u8), }, VcpuStopReason::Unknown => { log::warn!("Unknown stop reason received"); @@ -105,11 +115,9 @@ impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop { log::info!("Received interrupt from GDB client - sending signal to target thread"); // Send a signal to the target thread to interrupt it - let ret = unsafe { pthread_kill(target.get_thread_id(), SIGRTMIN()) }; - - log::info!("pthread_kill returned {}", ret); + let res = target.interrupt_vcpu(); - if ret < 0 && ret != libc::ESRCH { + if !res { log::error!("Failed to send signal to target thread"); return Err(GdbTargetError::SendSignalError); } diff --git a/src/hyperlight_host/src/hypervisor/gdb/hyperv_debug.rs b/src/hyperlight_host/src/hypervisor/gdb/hyperv_debug.rs new file mode 100644 index 000000000..7a902d6de --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/gdb/hyperv_debug.rs @@ -0,0 +1,255 @@ +/* +Copyright 2024 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use std::collections::HashMap; + +use windows::Win32::System::Hypervisor::WHV_VP_EXCEPTION_CONTEXT; + +use super::arch::{MAX_NO_OF_HW_BP, vcpu_stop_reason}; +use super::{GuestDebug, SW_BP_SIZE, VcpuStopReason, X86_64Regs}; +use crate::hypervisor::windows_hypervisor_platform::VMProcessor; +use crate::hypervisor::wrappers::{WHvDebugRegisters, WHvGeneralRegisters}; +use crate::{HyperlightError, Result, new_error}; + +/// KVM Debug struct +/// This struct is used to abstract the internal details of the kvm +/// guest debugging settings +#[derive(Default)] +pub(crate) struct HypervDebug { + /// vCPU stepping state + single_step: bool, + + /// Array of addresses for HW breakpoints + hw_breakpoints: Vec, + /// Saves the bytes modified to enable SW breakpoints + sw_breakpoints: HashMap, + + /// Debug registers + dbg_cfg: WHvDebugRegisters, +} + +impl HypervDebug { + pub(crate) fn new() -> Self { + Self { + single_step: false, + hw_breakpoints: vec![], + sw_breakpoints: HashMap::new(), + dbg_cfg: WHvDebugRegisters::default(), + } + } + + /// Returns the instruction pointer from the stopped vCPU + fn get_instruction_pointer(&self, vcpu_fd: &VMProcessor) -> Result { + let regs = vcpu_fd + .get_regs() + .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?; + + Ok(regs.rip) + } + + /// This method sets the kvm debugreg fields to enable breakpoints at + /// specific addresses + /// + /// The first 4 debug registers are used to set the addresses + /// The 4th and 5th debug registers are obsolete and not used + /// The 7th debug register is used to enable the breakpoints + /// For more information see: DEBUG REGISTERS chapter in the architecture + /// manual + fn set_debug_config(&mut self, vcpu_fd: &VMProcessor, step: bool) -> Result<()> { + let addrs = &self.hw_breakpoints; + + let mut dbg_cfg = WHvDebugRegisters::default(); + + for (k, addr) in addrs.iter().enumerate() { + match k { + 0 => { + dbg_cfg.dr0 = *addr; + } + 1 => { + dbg_cfg.dr1 = *addr; + } + 2 => { + dbg_cfg.dr2 = *addr; + } + 3 => { + dbg_cfg.dr3 = *addr; + } + _ => { + Err(new_error!("Tried to set more than 4 HW breakpoints"))?; + } + } + dbg_cfg.dr7 |= 1 << (k * 2); + } + + self.dbg_cfg = dbg_cfg; + + vcpu_fd + .set_debug_regs(&self.dbg_cfg) + .map_err(|e| new_error!("Could not set guest debug: {:?}", e))?; + + self.single_step = step; + + let mut regs = vcpu_fd + .get_regs() + .map_err(|e| new_error!("Could not get registers: {:?}", e))?; + + // Set TF Flag to enable Traps + if self.single_step { + regs.rflags |= 1 << 8; // Set the TF flag + } else { + regs.rflags &= !(1 << 8); // Clear the TF flag + } + + vcpu_fd + .set_general_purpose_registers(®s) + .map_err(|e| new_error!("Could not set guest registers: {:?}", e))?; + + Ok(()) + } + + /// Get the reason the vCPU has stopped + pub(crate) fn get_stop_reason( + &mut self, + vcpu_fd: &VMProcessor, + exception: WHV_VP_EXCEPTION_CONTEXT, + entrypoint: u64, + ) -> Result { + let rip = self.get_instruction_pointer(vcpu_fd)?; + let rip = self.translate_gva(vcpu_fd, rip)?; + + let debug_regs = vcpu_fd + .get_debug_regs() + .map_err(|e| new_error!("Could not retrieve registers from vCPU: {:?}", e))?; + + // Check if the vCPU stopped because of a hardware breakpoint + let reason = vcpu_stop_reason( + self.single_step, + rip, + debug_regs.dr6, + entrypoint, + exception.ExceptionType as u32, + &self.hw_breakpoints, + &self.sw_breakpoints, + ); + + if let VcpuStopReason::EntryPointBp = reason { + // In case the hw breakpoint is the entry point, remove it to + // avoid hanging here as gdb does not remove breakpoints it + // has not set. + // Gdb expects the target to be stopped when connected. + self.remove_hw_breakpoint(vcpu_fd, entrypoint)?; + } + + Ok(reason) + } +} + +impl GuestDebug for HypervDebug { + type Vcpu = VMProcessor; + + fn is_hw_breakpoint(&self, addr: &u64) -> bool { + self.hw_breakpoints.contains(addr) + } + fn is_sw_breakpoint(&self, addr: &u64) -> bool { + self.sw_breakpoints.contains_key(addr) + } + fn save_hw_breakpoint(&mut self, addr: &u64) -> bool { + if self.hw_breakpoints.len() >= MAX_NO_OF_HW_BP { + false + } else { + self.hw_breakpoints.push(*addr); + + true + } + } + fn save_sw_breakpoint_data(&mut self, addr: u64, data: [u8; 1]) { + _ = self.sw_breakpoints.insert(addr, data); + } + fn delete_hw_breakpoint(&mut self, addr: &u64) { + self.hw_breakpoints.retain(|&a| a != *addr); + } + fn delete_sw_breakpoint_data(&mut self, addr: &u64) -> Option<[u8; 1]> { + self.sw_breakpoints.remove(addr) + } + + fn read_regs(&self, vcpu_fd: &Self::Vcpu, regs: &mut X86_64Regs) -> Result<()> { + log::debug!("Read registers"); + let vcpu_regs = vcpu_fd + .get_regs() + .map_err(|e| new_error!("Could not read guest registers: {:?}", e))?; + + regs.rax = vcpu_regs.rax; + regs.rbx = vcpu_regs.rbx; + regs.rcx = vcpu_regs.rcx; + regs.rdx = vcpu_regs.rdx; + regs.rsi = vcpu_regs.rsi; + regs.rdi = vcpu_regs.rdi; + regs.rbp = vcpu_regs.rbp; + regs.rsp = vcpu_regs.rsp; + regs.r8 = vcpu_regs.r8; + regs.r9 = vcpu_regs.r9; + regs.r10 = vcpu_regs.r10; + regs.r11 = vcpu_regs.r11; + regs.r12 = vcpu_regs.r12; + regs.r13 = vcpu_regs.r13; + regs.r14 = vcpu_regs.r14; + regs.r15 = vcpu_regs.r15; + + regs.rip = vcpu_regs.rip; + regs.rflags = vcpu_regs.rflags; + + Ok(()) + } + + fn set_single_step(&mut self, vcpu_fd: &Self::Vcpu, enable: bool) -> Result<()> { + self.set_debug_config(vcpu_fd, enable) + } + + fn translate_gva(&self, vcpu_fd: &Self::Vcpu, gva: u64) -> Result { + vcpu_fd + .translate_gva(gva) + .map_err(|_| HyperlightError::TranslateGuestAddress(gva)) + } + + fn write_regs(&self, vcpu_fd: &Self::Vcpu, regs: &X86_64Regs) -> Result<()> { + log::debug!("Write registers"); + let regs = WHvGeneralRegisters { + rax: regs.rax, + rbx: regs.rbx, + rcx: regs.rcx, + rdx: regs.rdx, + rsi: regs.rsi, + rdi: regs.rdi, + rbp: regs.rbp, + rsp: regs.rsp, + r8: regs.r8, + r9: regs.r9, + r10: regs.r10, + r11: regs.r11, + r12: regs.r12, + r13: regs.r13, + r14: regs.r14, + r15: regs.r15, + + rip: regs.rip, + rflags: regs.rflags, + }; + + vcpu_fd + .set_general_purpose_registers(®s) + .map_err(|e| new_error!("Could not write guest registers: {:?}", e)) + } +} diff --git a/src/hyperlight_host/src/hypervisor/gdb/mod.rs b/src/hyperlight_host/src/hypervisor/gdb/mod.rs index 47af50b40..3cc8ecd7c 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/mod.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/mod.rs @@ -16,6 +16,8 @@ limitations under the License. mod arch; mod event_loop; +#[cfg(target_os = "windows")] +mod hyperv_debug; #[cfg(kvm)] mod kvm_debug; #[cfg(mshv)] @@ -34,6 +36,8 @@ use gdbstub::conn::ConnectionExt; use gdbstub::stub::GdbStub; use gdbstub::target::TargetError; use hyperlight_common::mem::PAGE_SIZE; +#[cfg(target_os = "windows")] +pub(crate) use hyperv_debug::HypervDebug; #[cfg(kvm)] pub(crate) use kvm_debug::KvmDebug; #[cfg(mshv)] @@ -41,6 +45,7 @@ pub(crate) use mshv_debug::MshvDebug; use thiserror::Error; use x86_64_target::HyperlightSandboxTarget; +use super::InterruptHandle; use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; use crate::mem::layout::SandboxMemoryLayout; use crate::{HyperlightError, new_error}; @@ -147,6 +152,7 @@ pub(crate) enum DebugResponse { ErrorOccurred, GetCodeSectionOffset(u64), NotAllowed, + InterruptHandle(Arc), ReadAddr(Vec), ReadRegisters(X86_64Regs), RemoveHwBreakpoint(bool), @@ -158,7 +164,7 @@ pub(crate) enum DebugResponse { } /// This trait is used to define common debugging functionality for Hypervisors -pub(crate) trait GuestDebug { +pub(super) trait GuestDebug { /// Type that wraps the vCPU functionality type Vcpu; @@ -380,7 +386,6 @@ impl DebugCommChannel { /// Creates a thread that handles gdb protocol pub(crate) fn create_gdb_thread( port: u16, - thread_id: u64, ) -> Result, GdbTargetError> { let (gdb_conn, hyp_conn) = DebugCommChannel::unbounded(); let socket = format!("localhost:{}", port); @@ -398,12 +403,23 @@ pub(crate) fn create_gdb_thread( let conn: Box> = Box::new(conn); let debugger = GdbStub::new(conn); - let mut target = HyperlightSandboxTarget::new(hyp_conn, thread_id); + let mut target = HyperlightSandboxTarget::new(hyp_conn); // Waits for vCPU to stop at entrypoint breakpoint - let res = target.recv()?; - if let DebugResponse::VcpuStopped(_) = res { + let msg = target.recv()?; + if let DebugResponse::InterruptHandle(handle) = msg { + log::info!("Received interrupt handle: {:?}", handle); + target.set_interrupt_handle(handle); + } else { + return Err(GdbTargetError::UnexpectedMessage); + } + + // Waits for vCPU to stop at entrypoint breakpoint + let msg = target.recv()?; + if let DebugResponse::VcpuStopped(_) = msg { event_loop_thread(debugger, &mut target); + } else { + return Err(GdbTargetError::UnexpectedMessage); } Ok(()) diff --git a/src/hyperlight_host/src/hypervisor/gdb/x86_64_target.rs b/src/hyperlight_host/src/hypervisor/gdb/x86_64_target.rs index 0c48f84e9..3248e6082 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/x86_64_target.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/x86_64_target.rs @@ -14,6 +14,8 @@ See the License for the specific language governing permissions and limitations under the License. */ +use std::sync::Arc; + use crossbeam_channel::TryRecvError; use gdbstub::arch::Arch; use gdbstub::common::Signal; @@ -30,20 +32,21 @@ use gdbstub::target::{Target, TargetError, TargetResult}; use gdbstub_arch::x86::X86_64_SSE as GdbTargetArch; use super::{DebugCommChannel, DebugMsg, DebugResponse, GdbTargetError, X86_64Regs}; +use crate::hypervisor::InterruptHandle; /// Gdbstub target used by the gdbstub crate to provide GDB protocol implementation pub(crate) struct HyperlightSandboxTarget { /// Hypervisor communication channels hyp_conn: DebugCommChannel, - /// Thread ID - thread_id: u64, + /// Interrupt handle for the vCPU thread + interrupt_handle: Option>, } impl HyperlightSandboxTarget { - pub(crate) fn new(hyp_conn: DebugCommChannel, thread_id: u64) -> Self { + pub(crate) fn new(hyp_conn: DebugCommChannel) -> Self { HyperlightSandboxTarget { hyp_conn, - thread_id, + interrupt_handle: None, } } @@ -60,9 +63,9 @@ impl HyperlightSandboxTarget { self.hyp_conn.send(ev) } - /// Returns the thread ID - pub(crate) fn get_thread_id(&self) -> u64 { - self.thread_id + /// Set the interrupt handle for the vCPU thread + pub(crate) fn set_interrupt_handle(&mut self, handle: Arc) { + self.interrupt_handle = Some(handle); } /// Waits for a response over the communication channel @@ -113,6 +116,17 @@ impl HyperlightSandboxTarget { } } } + + /// Interrupts the vCPU execution + pub(crate) fn interrupt_vcpu(&mut self) -> bool { + if let Some(handle) = &self.interrupt_handle { + handle.kill_from_debugger() + } else { + log::warn!("No interrupt handle set, cannot interrupt vCPU"); + + false + } + } } impl Target for HyperlightSandboxTarget { @@ -464,7 +478,7 @@ mod tests { fn test_gdb_target() { let (gdb_conn, hyp_conn) = DebugCommChannel::unbounded(); - let mut target = HyperlightSandboxTarget::new(hyp_conn, 0); + let mut target = HyperlightSandboxTarget::new(hyp_conn); // Check response to read registers - send the response first to not be blocked // by the recv call in the target diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index bd52996d6..db3f81c9b 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -397,42 +397,53 @@ impl HypervLinuxDriver { Self::setup_initial_sregs(&mut vcpu_fd, pml4_ptr.absolute()?)?; - Ok(Self { + let interrupt_handle = Arc::new(LinuxInterruptHandle { + running: AtomicU64::new(0), + cancel_requested: AtomicBool::new(false), + #[cfg(gdb)] + debug_interrupt: AtomicBool::new(false), + #[cfg(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" + ))] + tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), + #[cfg(not(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" + )))] + tid: AtomicU64::new(unsafe { libc::pthread_self() }), + retry_delay: config.get_interrupt_retry_delay(), + sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), + dropped: AtomicBool::new(false), + }); + + #[allow(unused_mut)] + let mut hv = Self { _mshv: mshv, vm_fd, vcpu_fd, mem_regions, entrypoint: entrypoint_ptr.absolute()?, orig_rsp: rsp_ptr, - interrupt_handle: Arc::new(LinuxInterruptHandle { - running: AtomicU64::new(0), - cancel_requested: AtomicBool::new(false), - #[cfg(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - ))] - tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), - #[cfg(not(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - )))] - tid: AtomicU64::new(unsafe { libc::pthread_self() }), - retry_delay: config.get_interrupt_retry_delay(), - sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), - dropped: AtomicBool::new(false), - }), - + interrupt_handle: interrupt_handle.clone(), #[cfg(gdb)] debug, #[cfg(gdb)] gdb_conn, #[cfg(crashdump)] rt_cfg, - }) + }; + + // Send the interrupt handle to the GDB thread if debugging is enabled + // This is used to allow the GDB thread to stop the vCPU + #[cfg(gdb)] + hv.send_dbg_msg(DebugResponse::InterruptHandle(interrupt_handle))?; + + Ok(hv) } #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] @@ -634,6 +645,14 @@ impl Hypervisor for HypervLinuxDriver { e ) })?; + #[cfg(not(gdb))] + let debug_interrupt = false; + #[cfg(gdb)] + let debug_interrupt = self + .interrupt_handle + .debug_interrupt + .load(Ordering::Relaxed); + // Don't run the vcpu if `cancel_requested` is true // // Note: if a `InterruptHandle::kill()` called while this thread is **here** @@ -642,6 +661,7 @@ impl Hypervisor for HypervLinuxDriver { .interrupt_handle .cancel_requested .load(Ordering::Relaxed) + || debug_interrupt { Err(MshvError::Errno(vmm_sys_util::errno::Error::new( libc::EINTR, @@ -667,6 +687,11 @@ impl Hypervisor for HypervLinuxDriver { .interrupt_handle .cancel_requested .load(Ordering::Relaxed); + #[cfg(gdb)] + let debug_interrupt = self + .interrupt_handle + .debug_interrupt + .load(Ordering::Relaxed); // Note: if a `InterruptHandle::kill()` called while this thread is **here** // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. // Additionally signals will be sent to this thread until `running` is set to false. @@ -754,27 +779,23 @@ impl Hypervisor for HypervLinuxDriver { Err(e) => match e.errno() { // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled libc::EINTR => { - // If cancellation was not requested for this specific vm, the vcpu was interrupted because of stale signal - // that was meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or + // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it if cancel_requested { self.interrupt_handle .cancel_requested .store(false, Ordering::Relaxed); HyperlightExit::Cancelled() } else { - // In case of the gdb feature, if no cancellation was requested, - // and the debugging is enabled it means the vCPU was stopped because - // of an interrupt coming from the debugger thread #[cfg(gdb)] - if self.debug.is_some() { + if debug_interrupt { + self.interrupt_handle + .debug_interrupt + .store(false, Ordering::Relaxed); + // If the vCPU was stopped because of an interrupt, we need to // return a special exit reason so that the gdb thread can handle it // and resume execution - // NOTE: There is a chance that the vCPU was stopped because of a stale - // signal that was meant to be delivered to a previous/other vCPU on this - // same thread, however, we cannot distinguish between the two cases, so - // we assume that the vCPU was stopped because of an interrupt. - // This is fine, because the debugger will be notified about an interrupt HyperlightExit::Debug(VcpuStopReason::Interrupt) } else { HyperlightExit::Retry() diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index aac5594c6..0d6d794bc 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -30,10 +30,18 @@ use windows::Win32::System::Hypervisor::{ }; #[cfg(crashdump)] use {super::crashdump, std::path::Path}; +#[cfg(gdb)] +use { + super::gdb::{ + DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, HypervDebug, VcpuStopReason, + }, + super::handlers::DbgMemAccessHandlerWrapper, + crate::hypervisor::handlers::DbgMemAccessHandlerCaller, + crate::{HyperlightError, log_then_return}, + std::sync::Mutex, +}; use super::fpu::{FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; -#[cfg(gdb)] -use super::handlers::DbgMemAccessHandlerWrapper; use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; use super::surrogate_process::SurrogateProcess; use super::surrogate_process_manager::*; @@ -53,6 +61,211 @@ use crate::mem::ptr::{GuestPtr, RawPtr}; use crate::sandbox::uninitialized::SandboxRuntimeConfig; use crate::{Result, debug, new_error}; +#[cfg(gdb)] +mod debug { + use std::sync::{Arc, Mutex}; + + use windows::Win32::System::Hypervisor::WHV_VP_EXCEPTION_CONTEXT; + + use super::{HypervWindowsDriver, *}; + use crate::hypervisor::gdb::{DebugMsg, DebugResponse, VcpuStopReason, X86_64Regs}; + use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; + use crate::{Result, new_error}; + + impl HypervWindowsDriver { + /// Resets the debug information to disable debugging + fn disable_debug(&mut self) -> Result<()> { + let mut debug = HypervDebug::default(); + + debug.set_single_step(&self.processor, false)?; + + self.debug = Some(debug); + + Ok(()) + } + + /// Get the reason the vCPU has stopped + pub(crate) fn get_stop_reason( + &mut self, + exception: WHV_VP_EXCEPTION_CONTEXT, + ) -> Result { + let debug = self + .debug + .as_mut() + .ok_or_else(|| new_error!("Debug is not enabled"))?; + + debug.get_stop_reason(&self.processor, exception, self.entrypoint) + } + + pub(crate) fn process_dbg_request( + &mut self, + req: DebugMsg, + dbg_mem_access_fn: Arc>, + ) -> Result { + if let Some(debug) = self.debug.as_mut() { + match req { + DebugMsg::AddHwBreakpoint(addr) => Ok(DebugResponse::AddHwBreakpoint( + debug + .add_hw_breakpoint(&self.processor, addr) + .map_err(|e| { + log::error!("Failed to add hw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::AddSwBreakpoint(addr) => Ok(DebugResponse::AddSwBreakpoint( + debug + .add_sw_breakpoint(&self.processor, addr, dbg_mem_access_fn) + .map_err(|e| { + log::error!("Failed to add sw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::Continue => { + debug.set_single_step(&self.processor, false).map_err(|e| { + log::error!("Failed to continue execution: {:?}", e); + + e + })?; + + Ok(DebugResponse::Continue) + } + DebugMsg::DisableDebug => { + self.disable_debug().map_err(|e| { + log::error!("Failed to disable debugging: {:?}", e); + + e + })?; + + Ok(DebugResponse::DisableDebug) + } + DebugMsg::GetCodeSectionOffset => { + let offset = dbg_mem_access_fn + .try_lock() + .map_err(|e| { + new_error!("Error locking at {}:{}: {}", file!(), line!(), e) + })? + .get_code_offset() + .map_err(|e| { + log::error!("Failed to get code offset: {:?}", e); + + e + })?; + + Ok(DebugResponse::GetCodeSectionOffset(offset as u64)) + } + DebugMsg::ReadAddr(addr, len) => { + let mut data = vec![0u8; len]; + + debug + .read_addrs(&self.processor, addr, &mut data, dbg_mem_access_fn) + .map_err(|e| { + log::error!("Failed to read from address: {:?}", e); + + e + })?; + + Ok(DebugResponse::ReadAddr(data)) + } + DebugMsg::ReadRegisters => { + let mut regs = X86_64Regs::default(); + + debug + .read_regs(&self.processor, &mut regs) + .map_err(|e| { + log::error!("Failed to read registers: {:?}", e); + + e + }) + .map(|_| DebugResponse::ReadRegisters(regs)) + } + DebugMsg::RemoveHwBreakpoint(addr) => Ok(DebugResponse::RemoveHwBreakpoint( + debug + .remove_hw_breakpoint(&self.processor, addr) + .map_err(|e| { + log::error!("Failed to remove hw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::RemoveSwBreakpoint(addr) => Ok(DebugResponse::RemoveSwBreakpoint( + debug + .remove_sw_breakpoint(&self.processor, addr, dbg_mem_access_fn) + .map_err(|e| { + log::error!("Failed to remove sw breakpoint: {:?}", e); + + e + }) + .is_ok(), + )), + DebugMsg::Step => { + debug.set_single_step(&self.processor, true).map_err(|e| { + log::error!("Failed to enable step instruction: {:?}", e); + + e + })?; + + Ok(DebugResponse::Step) + } + DebugMsg::WriteAddr(addr, data) => { + debug + .write_addrs(&self.processor, addr, &data, dbg_mem_access_fn) + .map_err(|e| { + log::error!("Failed to write to address: {:?}", e); + + e + })?; + + Ok(DebugResponse::WriteAddr) + } + DebugMsg::WriteRegisters(regs) => debug + .write_regs(&self.processor, ®s) + .map_err(|e| { + log::error!("Failed to write registers: {:?}", e); + + e + }) + .map(|_| DebugResponse::WriteRegisters), + } + } else { + Err(new_error!("Debugging is not enabled")) + } + } + + pub(crate) fn recv_dbg_msg(&mut self) -> Result { + let gdb_conn = self + .gdb_conn + .as_mut() + .ok_or_else(|| new_error!("Debug is not enabled"))?; + + gdb_conn.recv().map_err(|e| { + new_error!( + "Got an error while waiting to receive a + message: {:?}", + e + ) + }) + } + + pub(crate) fn send_dbg_msg(&mut self, cmd: DebugResponse) -> Result<()> { + log::debug!("Sending {:?}", cmd); + + let gdb_conn = self + .gdb_conn + .as_mut() + .ok_or_else(|| new_error!("Debug is not enabled"))?; + + gdb_conn + .send(cmd) + .map_err(|e| new_error!("Got an error while sending a response message {:?}", e)) + } + } +} + /// A Hypervisor driver for HyperV-on-Windows. pub(crate) struct HypervWindowsDriver { processor: VMProcessor, @@ -61,6 +274,10 @@ pub(crate) struct HypervWindowsDriver { orig_rsp: GuestPtr, mem_regions: Vec, interrupt_handle: Arc, + #[cfg(gdb)] + debug: Option, + #[cfg(gdb)] + gdb_conn: Option>, #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, } @@ -82,6 +299,7 @@ impl HypervWindowsDriver { entrypoint: u64, rsp: u64, mmap_file_handle: HandleWrapper, + #[cfg(gdb)] gdb_conn: Option>, #[cfg(crashdump)] rt_cfg: SandboxRuntimeConfig, ) -> Result { // create and setup hypervisor partition @@ -100,21 +318,47 @@ impl HypervWindowsDriver { Self::setup_initial_sregs(&mut proc, pml4_address)?; let partition_handle = proc.get_partition_hdl(); - Ok(Self { + #[cfg(gdb)] + let (debug, gdb_conn) = if let Some(gdb_conn) = gdb_conn { + let mut debug = HypervDebug::new(); + debug.add_hw_breakpoint(&proc, entrypoint)?; + + (Some(debug), Some(gdb_conn)) + } else { + (None, None) + }; + + let interrupt_handle = Arc::new(WindowsInterruptHandle { + running: AtomicBool::new(false), + cancel_requested: AtomicBool::new(false), + #[cfg(gdb)] + debug_interrupt: AtomicBool::new(false), + partition_handle, + dropped: AtomicBool::new(false), + }); + + #[allow(unused_mut)] + let mut hv = Self { processor: proc, _surrogate_process: surrogate_process, entrypoint, orig_rsp: GuestPtr::try_from(RawPtr::from(rsp))?, mem_regions, - interrupt_handle: Arc::new(WindowsInterruptHandle { - running: AtomicBool::new(false), - cancel_requested: AtomicBool::new(false), - partition_handle, - dropped: AtomicBool::new(false), - }), + interrupt_handle: interrupt_handle.clone(), + #[cfg(gdb)] + debug, + #[cfg(gdb)] + gdb_conn, #[cfg(crashdump)] rt_cfg, - }) + }; + + // Send the interrupt handle to the GDB thread if debugging is enabled + // This is used to allow the GDB thread to stop the vCPU + #[cfg(gdb)] + hv.send_dbg_msg(DebugResponse::InterruptHandle(interrupt_handle))?; + + Ok(hv) } fn setup_initial_sregs(proc: &mut VMProcessor, _pml4_addr: u64) -> Result<()> { @@ -424,11 +668,20 @@ impl Hypervisor for HypervWindowsDriver { fn run(&mut self) -> Result { self.interrupt_handle.running.store(true, Ordering::Relaxed); + #[cfg(not(gdb))] + let debug_interrupt = false; + #[cfg(gdb)] + let debug_interrupt = self + .interrupt_handle + .debug_interrupt + .load(Ordering::Relaxed); + // Don't run the vcpu if `cancel_requested` is true let exit_context = if self .interrupt_handle .cancel_requested .load(Ordering::Relaxed) + || debug_interrupt { WHV_RUN_VP_EXIT_CONTEXT { ExitReason: WHV_RUN_VP_EXIT_REASON(8193i32), // WHvRunVpExitReasonCanceled @@ -446,6 +699,12 @@ impl Hypervisor for HypervWindowsDriver { .running .store(false, Ordering::Relaxed); + #[cfg(gdb)] + let debug_interrupt = self + .interrupt_handle + .debug_interrupt + .load(Ordering::Relaxed); + let result = match exit_context.ExitReason { // WHvRunVpExitReasonX64IoPortAccess WHV_RUN_VP_EXIT_REASON(2i32) => { @@ -501,8 +760,35 @@ impl Hypervisor for HypervWindowsDriver { // This will happen when guest code runs for too long WHV_RUN_VP_EXIT_REASON(8193i32) => { debug!("HyperV Cancelled Details :\n {:#?}", &self); + #[cfg(gdb)] + if debug_interrupt { + self.interrupt_handle + .debug_interrupt + .store(false, Ordering::Relaxed); + + // If the vCPU was stopped because of an interrupt, we need to + // return a special exit reason so that the gdb thread can handle it + // and resume execution + HyperlightExit::Debug(VcpuStopReason::Interrupt) + } else { + HyperlightExit::Cancelled() + } + + #[cfg(not(gdb))] HyperlightExit::Cancelled() } + #[cfg(gdb)] + WHV_RUN_VP_EXIT_REASON(4098i32) => { + // Get information about the exception that triggered the exit + let exception = unsafe { exit_context.Anonymous.VpException }; + + match self.get_stop_reason(exception) { + Ok(reason) => HyperlightExit::Debug(reason), + Err(e) => { + log_then_return!("Error getting stop reason: {}", e); + } + } + } WHV_RUN_VP_EXIT_REASON(_) => { debug!( "HyperV Unexpected Exit Details :#nReason {:#?}\n {:#?}", @@ -584,6 +870,139 @@ impl Hypervisor for HypervWindowsDriver { Ok(None) } } + + #[cfg(gdb)] + fn handle_debug( + &mut self, + dbg_mem_access_fn: Arc>, + stop_reason: super::gdb::VcpuStopReason, + ) -> Result<()> { + if self.debug.is_none() { + return Err(new_error!("Debugging is not enabled")); + } + match stop_reason { + // If the vCPU stopped because of a crash, we need to handle it differently + // We do not want to allow resuming execution or placing breakpoints + // because the guest has crashed. + // We only allow reading registers and memory + VcpuStopReason::Crash => { + self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) + .map_err(|e| { + new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) + })?; + + loop { + log::debug!("Debug wait for event to resume vCPU"); + // Wait for a message from gdb + let req = self.recv_dbg_msg()?; + + // Flag to store if we should deny continue or step requests + let mut deny_continue = false; + // Flag to store if we should detach from the gdb session + let mut detach = false; + + let response = match req { + // Allow the detach request to disable debugging by continuing resuming + // hypervisor crash error reporting + DebugMsg::DisableDebug => { + detach = true; + DebugResponse::DisableDebug + } + // Do not allow continue or step requests + DebugMsg::Continue | DebugMsg::Step => { + deny_continue = true; + DebugResponse::NotAllowed + } + // Do not allow adding/removing breakpoints and writing to memory or registers + DebugMsg::AddHwBreakpoint(_) + | DebugMsg::AddSwBreakpoint(_) + | DebugMsg::RemoveHwBreakpoint(_) + | DebugMsg::RemoveSwBreakpoint(_) + | DebugMsg::WriteAddr(_, _) + | DebugMsg::WriteRegisters(_) => DebugResponse::NotAllowed, + + // For all other requests, we will process them normally + _ => { + let result = self.process_dbg_request(req, dbg_mem_access_fn.clone()); + match result { + Ok(response) => response, + Err(HyperlightError::TranslateGuestAddress(_)) => { + // Treat non fatal errors separately so the guest doesn't fail + DebugResponse::ErrorOccurred + } + Err(e) => { + log::error!("Error processing debug request: {:?}", e); + return Err(e); + } + } + } + }; + + // Send the response to the request back to gdb + self.send_dbg_msg(response) + .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; + + // If we are denying continue or step requests, the debugger assumes the + // execution started so we need to report a stop reason as a crash and let + // it request to read registers/memory to figure out what happened + if deny_continue { + self.send_dbg_msg(DebugResponse::VcpuStopped(VcpuStopReason::Crash)) + .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; + } + + // If we are detaching, we will break the loop and the Hypervisor will continue + // to handle the Crash reason + if detach { + break; + } + } + } + + // If the vCPU stopped because of any other reason except a crash, we can handle it + // normally + _ => { + self.send_dbg_msg(DebugResponse::VcpuStopped(stop_reason)) + .map_err(|e| { + new_error!("Couldn't signal vCPU stopped event to GDB thread: {:?}", e) + })?; + + loop { + log::debug!("Debug wait for event to resume vCPU"); + + // Wait for a message from gdb + let req = self.recv_dbg_msg()?; + + let result = self.process_dbg_request(req, dbg_mem_access_fn.clone()); + + let response = match result { + Ok(response) => response, + // Treat non fatal errors separately so the guest doesn't fail + Err(HyperlightError::TranslateGuestAddress(_)) => { + DebugResponse::ErrorOccurred + } + Err(e) => { + return Err(e); + } + }; + + // If the command was either step or continue, we need to run the vcpu + let cont = matches!( + response, + DebugResponse::Step | DebugResponse::Continue | DebugResponse::DisableDebug + ); + + self.send_dbg_msg(response) + .map_err(|e| new_error!("Couldn't send response to gdb: {:?}", e))?; + + if cont { + break; + } + } + } + } + + Ok(()) + } } impl Drop for HypervWindowsDriver { @@ -592,10 +1011,14 @@ impl Drop for HypervWindowsDriver { } } +#[derive(Debug)] pub struct WindowsInterruptHandle { // `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag. running: AtomicBool, cancel_requested: AtomicBool, + // This is used to signal the GDB thread to stop the vCPU + #[cfg(gdb)] + debug_interrupt: AtomicBool, partition_handle: WHV_PARTITION_HANDLE, dropped: AtomicBool, } @@ -606,6 +1029,12 @@ impl InterruptHandle for WindowsInterruptHandle { self.running.load(Ordering::Relaxed) && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } + #[cfg(gdb)] + fn kill_from_debugger(&self) -> bool { + self.debug_interrupt.store(true, Ordering::Relaxed); + self.running.load(Ordering::Relaxed) + && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + } fn dropped(&self) -> bool { self.dropped.load(Ordering::Relaxed) diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index 4bc17035b..d6b348cc0 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -351,35 +351,39 @@ impl KVMDriver { let rsp_gp = GuestPtr::try_from(RawPtr::from(rsp))?; - let ret = Self { + let interrupt_handle = Arc::new(LinuxInterruptHandle { + running: AtomicU64::new(0), + cancel_requested: AtomicBool::new(false), + #[cfg(gdb)] + debug_interrupt: AtomicBool::new(false), + #[cfg(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" + ))] + tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), + #[cfg(not(all( + target_arch = "x86_64", + target_vendor = "unknown", + target_os = "linux", + target_env = "musl" + )))] + tid: AtomicU64::new(unsafe { libc::pthread_self() }), + retry_delay: config.get_interrupt_retry_delay(), + dropped: AtomicBool::new(false), + sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), + }); + + #[allow(unused_mut)] + let mut hv = Self { _kvm: kvm, _vm_fd: vm_fd, vcpu_fd, entrypoint, orig_rsp: rsp_gp, mem_regions, - interrupt_handle: Arc::new(LinuxInterruptHandle { - running: AtomicU64::new(0), - cancel_requested: AtomicBool::new(false), - #[cfg(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - ))] - tid: AtomicU64::new(unsafe { libc::pthread_self() as u64 }), - #[cfg(not(all( - target_arch = "x86_64", - target_vendor = "unknown", - target_os = "linux", - target_env = "musl" - )))] - tid: AtomicU64::new(unsafe { libc::pthread_self() }), - retry_delay: config.get_interrupt_retry_delay(), - dropped: AtomicBool::new(false), - sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), - }), - + interrupt_handle: interrupt_handle.clone(), #[cfg(gdb)] debug, #[cfg(gdb)] @@ -387,7 +391,13 @@ impl KVMDriver { #[cfg(crashdump)] rt_cfg, }; - Ok(ret) + + // Send the interrupt handle to the GDB thread if debugging is enabled + // This is used to allow the GDB thread to stop the vCPU + #[cfg(gdb)] + hv.send_dbg_msg(DebugResponse::InterruptHandle(interrupt_handle))?; + + Ok(hv) } #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] @@ -564,6 +574,13 @@ impl Hypervisor for KVMDriver { e ) })?; + #[cfg(not(gdb))] + let debug_interrupt = false; + #[cfg(gdb)] + let debug_interrupt = self + .interrupt_handle + .debug_interrupt + .load(Ordering::Relaxed); // Don't run the vcpu if `cancel_requested` is true // // Note: if a `InterruptHandle::kill()` called while this thread is **here** @@ -572,6 +589,7 @@ impl Hypervisor for KVMDriver { .interrupt_handle .cancel_requested .load(Ordering::Relaxed) + || debug_interrupt { Err(kvm_ioctls::Error::new(libc::EINTR)) } else { @@ -585,12 +603,15 @@ impl Hypervisor for KVMDriver { // Note: if a `InterruptHandle::kill()` called while this thread is **here** // Then signals will be sent to this thread until `running` is set to false. // This is fine since the signal handler is a no-op. - #[allow(unused_variables)] - // The variable is only used when `cfg(not(gdb))`, but the flag needs to be reset always anyway let cancel_requested = self .interrupt_handle .cancel_requested .load(Ordering::Relaxed); + #[cfg(gdb)] + let debug_interrupt = self + .interrupt_handle + .debug_interrupt + .load(Ordering::Relaxed); // Note: if a `InterruptHandle::kill()` called while this thread is **here** // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. // Additionally signals will be sent to this thread until `running` is set to false. @@ -646,24 +667,20 @@ impl Hypervisor for KVMDriver { Err(e) => match e.errno() { // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled libc::EINTR => { - // If cancellation was not requested for this specific vm, the vcpu was interrupted because of stale signal - // that was meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or + // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it if cancel_requested { self.interrupt_handle .cancel_requested .store(false, Ordering::Relaxed); HyperlightExit::Cancelled() } else { - // In case of the gdb feature, if no cancellation was requested, - // and the debugging is enabled it means the vCPU was stopped because - // of an interrupt coming from the debugger thread - // NOTE: There is a chance that the vCPU was stopped because of a stale - // signal that was meant to be delivered to a previous/other vCPU on this - // same thread, however, we cannot distinguish between the two cases, so - // we assume that the vCPU was stopped because of an interrupt. - // This is fine, because the debugger will be notified about an interrupt #[cfg(gdb)] - if self.debug.is_some() { + if debug_interrupt { + self.interrupt_handle + .debug_interrupt + .store(false, Ordering::Relaxed); + // If the vCPU was stopped because of an interrupt, we need to // return a special exit reason so that the gdb thread can handle it // and resume execution diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index f8fa29f51..0a31ee468 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -336,7 +336,7 @@ impl VirtualCPU { } /// A trait for handling interrupts to a sandbox's vcpu -pub trait InterruptHandle: Send + Sync { +pub trait InterruptHandle: Debug + Send + Sync { /// Interrupt the corresponding sandbox from running. /// /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`. @@ -348,7 +348,19 @@ pub trait InterruptHandle: Send + Sync { /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted. fn kill(&self) -> bool; - /// Returns true iff the corresponding sandbox has been dropped + /// Used by a debugger to interrupt the corresponding sandbox from running. + /// + /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`. + /// - If this is called while the vcpu is not running, (for example during a host call), the + /// vcpu will not immediately be interrupted, but will prevent the vcpu from running **the next time** + /// it's scheduled, and returns `false`. + /// + /// # Note + /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted. + #[cfg(gdb)] + fn kill_from_debugger(&self) -> bool; + + /// Returns true if the corresponding sandbox has been dropped fn dropped(&self) -> bool; } @@ -383,6 +395,12 @@ pub(super) struct LinuxInterruptHandle { /// 2. ensure that if a vm is killed while a host call is running, /// the vm will not re-enter the guest after the host call returns. cancel_requested: AtomicBool, + /// True when the debugger has requested the VM to be interrupted. Set immediately when + /// `kill_from_debugger()` is called, and cleared when the vcpu is no longer running. + /// This is used to make sure stale signals do not interrupt the the wrong vcpu + /// (a vcpu may only be interrupted by a debugger if `debug_interrupt` is true), + #[cfg(gdb)] + debug_interrupt: AtomicBool, /// Whether the corresponding vm is dropped dropped: AtomicBool, /// Retry delay between signals sent to the vcpu thread @@ -421,13 +439,8 @@ impl LinuxInterruptHandle { let generation = raw & !Self::RUNNING_BIT; (running, generation) } -} - -#[cfg(any(kvm, mshv))] -impl InterruptHandle for LinuxInterruptHandle { - fn kill(&self) -> bool { - self.cancel_requested.store(true, Ordering::Relaxed); + fn send_signal(&self) -> bool { let signal_number = libc::SIGRTMIN() + self.sig_rt_min_offset as libc::c_int; let mut sent_signal = false; let mut target_generation: Option = None; @@ -456,6 +469,20 @@ impl InterruptHandle for LinuxInterruptHandle { sent_signal } +} + +#[cfg(any(kvm, mshv))] +impl InterruptHandle for LinuxInterruptHandle { + fn kill(&self) -> bool { + self.cancel_requested.store(true, Ordering::Relaxed); + + self.send_signal() + } + #[cfg(gdb)] + fn kill_from_debugger(&self) -> bool { + self.debug_interrupt.store(true, Ordering::Relaxed); + self.send_signal() + } fn dropped(&self) -> bool { self.dropped.load(Ordering::Relaxed) } diff --git a/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs b/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs index 43ce7be3d..d6064443b 100644 --- a/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs +++ b/src/hyperlight_host/src/hypervisor/windows_hypervisor_platform.rs @@ -27,6 +27,8 @@ use windows_result::HRESULT; use super::surrogate_process::SurrogateProcess; #[cfg(crashdump)] use crate::HyperlightError; +#[cfg(gdb)] +use crate::hypervisor::wrappers::WHvDebugRegisters; use crate::hypervisor::wrappers::{WHvFPURegisters, WHvGeneralRegisters, WHvSpecialRegisters}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::{Result, new_error}; @@ -61,10 +63,16 @@ pub(crate) fn is_hypervisor_present() -> bool { pub(super) struct VMPartition(WHV_PARTITION_HANDLE); impl VMPartition { + /// This is the position of the extended vm exit in partition property + #[cfg(gdb)] + const EXTENDED_VM_EXIT_POS: u32 = 2; + #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(super) fn new(proc_count: u32) -> Result { let hdl = unsafe { WHvCreatePartition() }?; Self::set_processor_count(&hdl, proc_count)?; + #[cfg(gdb)] + Self::set_extended_vm_exits(&hdl)?; unsafe { WHvSetupPartition(hdl) }?; Ok(Self(hdl)) } @@ -86,6 +94,56 @@ impl VMPartition { Ok(()) } + /// Sets up the debugging exception interception for the partition + /// This is needed for a HyperV partition to be able to intercept debug traps and breakpoints + /// Steps: + /// - set the extended VM exits property to enable extended VM exits + /// - set the exception exit bitmap to include debug trap and breakpoint trap + #[cfg(gdb)] + #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] + pub fn set_extended_vm_exits(partition_handle: &WHV_PARTITION_HANDLE) -> Result<()> { + let mut property: WHV_PARTITION_PROPERTY = Default::default(); + + // Set the extended VM exits property + property.ExtendedVmExits.AsUINT64 = 1 << Self::EXTENDED_VM_EXIT_POS; + Self::set_property( + partition_handle, + WHvPartitionPropertyCodeExtendedVmExits, + &property, + )?; + + // Set the exception exit bitmap to include debug trap and breakpoint trap + property = Default::default(); + property.ExceptionExitBitmap = (1 << WHvX64ExceptionTypeDebugTrapOrFault.0) + | (1 << WHvX64ExceptionTypeBreakpointTrap.0); + Self::set_property( + partition_handle, + WHvPartitionPropertyCodeExceptionExitBitmap, + &property, + )?; + + Ok(()) + } + + /// Helper function to set partition properties + #[cfg(gdb)] + fn set_property( + partition_handle: &WHV_PARTITION_HANDLE, + property_code: WHV_PARTITION_PROPERTY_CODE, + property: &WHV_PARTITION_PROPERTY, + ) -> Result<()> { + unsafe { + WHvSetPartitionProperty( + *partition_handle, + property_code, + property as *const _ as *const c_void, + std::mem::size_of::() as u32, + )?; + } + + Ok(()) + } + #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(super) fn map_gpa_range( &mut self, @@ -206,7 +264,7 @@ impl Drop for VMPartition { } #[derive(Debug)] -pub(super) struct VMProcessor(VMPartition); +pub(crate) struct VMProcessor(VMPartition); impl VMProcessor { #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(super) fn new(part: VMPartition) -> Result { @@ -214,6 +272,31 @@ impl VMProcessor { Ok(Self(part)) } + /// This function is used to translate a guest virtual address to a guest physical address + #[cfg(gdb)] + pub(super) fn translate_gva(&self, gva: u64) -> Result { + let partition_handle = self.get_partition_hdl(); + let mut gpa = 0; + let mut result = WHV_TRANSLATE_GVA_RESULT::default(); + + // Only validate read access because the write access is handled through the + // host memory mapping + let translateflags = WHvTranslateGvaFlagValidateRead; + + unsafe { + WHvTranslateGva( + partition_handle, + 0, + gva, + translateflags, + &mut result, + &mut gpa, + )?; + } + + Ok(gpa) + } + #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(super) fn get_partition_hdl(&self) -> WHV_PARTITION_HANDLE { let part = &self.0; @@ -222,7 +305,7 @@ impl VMProcessor { #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(super) fn set_registers( - &mut self, + &self, registers: &[(WHV_REGISTER_NAME, WHV_REGISTER_VALUE)], ) -> Result<()> { let partition_handle = self.get_partition_hdl(); @@ -308,10 +391,7 @@ impl VMProcessor { // Sets the registers for the VMProcessor to the given general purpose registers. // If you want to set other registers, use `set_registers` instead. - pub(super) fn set_general_purpose_registers( - &mut self, - regs: &WHvGeneralRegisters, - ) -> Result<()> { + pub(super) fn set_general_purpose_registers(&self, regs: &WHvGeneralRegisters) -> Result<()> { const LEN: usize = 18; let names: [WHV_REGISTER_NAME; LEN] = [ @@ -477,7 +557,54 @@ impl VMProcessor { Ok(xsave_buffer) } - pub(super) fn set_fpu(&mut self, regs: &WHvFPURegisters) -> Result<()> { + #[cfg(gdb)] + pub(super) fn set_debug_regs(&self, regs: &WHvDebugRegisters) -> Result<()> { + let registers = vec![ + (WHvX64RegisterDr0, WHV_REGISTER_VALUE { Reg64: regs.dr0 }), + (WHvX64RegisterDr1, WHV_REGISTER_VALUE { Reg64: regs.dr1 }), + (WHvX64RegisterDr2, WHV_REGISTER_VALUE { Reg64: regs.dr2 }), + (WHvX64RegisterDr3, WHV_REGISTER_VALUE { Reg64: regs.dr3 }), + (WHvX64RegisterDr6, WHV_REGISTER_VALUE { Reg64: regs.dr6 }), + (WHvX64RegisterDr7, WHV_REGISTER_VALUE { Reg64: regs.dr7 }), + ]; + + self.set_registers(®isters) + } + + #[cfg(gdb)] + pub(super) fn get_debug_regs(&self) -> Result { + const LEN: usize = 6; + + let names: [WHV_REGISTER_NAME; LEN] = [ + WHvX64RegisterDr0, + WHvX64RegisterDr1, + WHvX64RegisterDr2, + WHvX64RegisterDr3, + WHvX64RegisterDr6, + WHvX64RegisterDr7, + ]; + + let mut out: [WHV_REGISTER_VALUE; LEN] = unsafe { std::mem::zeroed() }; + unsafe { + WHvGetVirtualProcessorRegisters( + self.get_partition_hdl(), + 0, + names.as_ptr(), + LEN as u32, + out.as_mut_ptr(), + )?; + Ok(WHvDebugRegisters { + dr0: out[0].Reg64, + dr1: out[1].Reg64, + dr2: out[2].Reg64, + dr3: out[3].Reg64, + dr6: out[4].Reg64, + dr7: out[5].Reg64, + }) + } + } + + pub(super) fn set_fpu(&self, regs: &WHvFPURegisters) -> Result<()> { const LEN: usize = 26; let names: [WHV_REGISTER_NAME; LEN] = [ diff --git a/src/hyperlight_host/src/hypervisor/wrappers.rs b/src/hyperlight_host/src/hypervisor/wrappers.rs index 58fe1f33c..4a2a1ff8b 100644 --- a/src/hyperlight_host/src/hypervisor/wrappers.rs +++ b/src/hyperlight_host/src/hypervisor/wrappers.rs @@ -80,6 +80,18 @@ pub(super) struct WHvGeneralRegisters { pub rflags: u64, } +/// only used on widos for handling debug registers with the VMProcessor +#[cfg(gdb)] +#[derive(Debug, Default, Copy, Clone, PartialEq)] +pub(super) struct WHvDebugRegisters { + pub dr0: u64, + pub dr1: u64, + pub dr2: u64, + pub dr3: u64, + pub dr6: u64, + pub dr7: u64, +} + #[derive(Debug, Default, Copy, Clone, PartialEq)] pub(super) struct WHvFPURegisters { pub xmm0: u128, diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index d5c9d2d72..a37f747e2 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -193,7 +193,7 @@ pub(crate) fn set_up_hypervisor_partition( let gdb_conn = if let Some(DebugInfo { port }) = rt_cfg.debug_info { use crate::hypervisor::gdb::create_gdb_thread; - let gdb_conn = create_gdb_thread(port, unsafe { libc::pthread_self() }); + let gdb_conn = create_gdb_thread(port); // in case the gdb thread creation fails, we still want to continue // without gdb @@ -256,6 +256,8 @@ pub(crate) fn set_up_hypervisor_partition( entrypoint_ptr.absolute()?, rsp_ptr.absolute()?, HandleWrapper::from(mmap_file_handle), + #[cfg(gdb)] + gdb_conn, #[cfg(crashdump)] rt_cfg.clone(), )?;