Skip to content

Commit 2bda460

Browse files
authored
implement poisoned sandbox support (#292)
Signed-off-by: Simon Davies <[email protected]>
1 parent b8c831a commit 2bda460

File tree

5 files changed

+421
-1
lines changed

5 files changed

+421
-1
lines changed

Justfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ examples-ci target=default-target features="": (build-rust-wasm-examples target)
9595
cargo run {{ if features =="" {''} else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example helloworld
9696
cargo run {{ if features =="" {''} else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example hostfuncs
9797
cargo run {{ if features =="" {''} else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example rust_wasm_examples
98+
cargo run {{ if features =="" {''} else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example interruption
9899
cargo run {{ if features =="" {''} else {"--no-default-features -F function_call_metrics," + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example metrics
99100
cargo run {{ if features =="" {"--no-default-features --features kvm,mshv3"} else {"--no-default-features -F function_call_metrics," + features } }} --profile={{ if target == "debug" {"dev"} else { target } }} --example metrics
100101

src/hyperlight_wasm/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ name = "tracing-otlp"
5454
path = "examples/tracing-otlp/main.rs"
5555
test = true
5656

57+
[[example]]
58+
name = "interruption"
59+
path = "examples/interruption/main.rs"
60+
test = true
61+
5762
[dependencies]
5863
hyperlight-host = { workspace = true }
5964
libc = { version = "0.2.178" }
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/*
2+
Copyright 2024 The Hyperlight Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
//! This example demonstrates how to:
18+
//! 1. Get an interrupt handle for a sandbox
19+
//! 2. Interrupt long-running guest code from another thread
20+
//! 3. Detect when a sandbox is poisoned
21+
//! 4. Recover a poisoned sandbox using `restore()` or `unload_module()`
22+
23+
use std::thread;
24+
use std::time::Duration;
25+
26+
use examples_common::get_wasm_module_path;
27+
use hyperlight_wasm::{HyperlightError, Result, SandboxBuilder};
28+
29+
fn get_time_since_boot_microsecond() -> Result<i64> {
30+
let res = std::time::SystemTime::now()
31+
.duration_since(std::time::SystemTime::UNIX_EPOCH)?
32+
.as_micros();
33+
i64::try_from(res).map_err(HyperlightError::IntConversionFailure)
34+
}
35+
36+
fn main() -> Result<()> {
37+
println!("=== Hyperlight-Wasm Interruption Example ===\n");
38+
39+
// Build a sandbox and register host functions
40+
let mut sandbox = SandboxBuilder::new().build()?;
41+
sandbox.register(
42+
"GetTimeSinceBootMicrosecond",
43+
get_time_since_boot_microsecond,
44+
)?;
45+
46+
let wasm_sandbox = sandbox.load_runtime()?;
47+
let mod_path = get_wasm_module_path("RunWasm.aot")?;
48+
let mut loaded = wasm_sandbox.load_module(mod_path)?;
49+
50+
println!("1. Sandbox created and module loaded");
51+
assert!(!loaded.is_poisoned()?);
52+
println!(" is_poisoned: {}", loaded.is_poisoned()?);
53+
54+
// Take a snapshot before we do anything
55+
let snapshot = loaded.snapshot()?;
56+
println!("2. Snapshot taken for later recovery\n");
57+
58+
// Get an interrupt handle - this can be sent to another thread
59+
let interrupt = loaded.interrupt_handle()?;
60+
println!("3. Interrupt handle obtained\n");
61+
62+
// Spawn a thread that will interrupt the guest after 1 second
63+
println!("4. Starting long-running guest function...");
64+
println!(" (A background thread will interrupt it after 1 second)\n");
65+
66+
thread::spawn(move || {
67+
thread::sleep(Duration::from_secs(1));
68+
println!(" [Background thread] Calling interrupt.kill()...");
69+
interrupt.kill();
70+
});
71+
72+
// Call a long-running guest function that will be interrupted
73+
let result = loaded.call_guest_function::<i32>("KeepCPUBusy", 100000i32);
74+
75+
match result {
76+
Ok(_) => panic!(" Guest function completed (unexpected!)"),
77+
Err(HyperlightError::ExecutionCanceledByHost()) => {
78+
println!(" Guest function was interrupted (ExecutionCanceledByHost)");
79+
}
80+
Err(e) => panic!(" Unexpected error: {:?}", e),
81+
}
82+
83+
println!("\n5. Checking sandbox state after interruption:");
84+
println!(" is_poisoned: {}", loaded.is_poisoned()?);
85+
86+
// Demonstrate that calling a poisoned sandbox fails
87+
println!("\n6. Attempting to call guest function on poisoned sandbox...");
88+
let result = loaded.call_guest_function::<i32>("CalcFib", 10i32);
89+
90+
match result {
91+
Ok(_) => panic!(" Call succeeded (unexpected!)"),
92+
Err(HyperlightError::PoisonedSandbox) => {
93+
println!(" Call failed with PoisonedSandbox error (expected)");
94+
}
95+
Err(e) => panic!(" Unexpected error: {:?}", e),
96+
}
97+
98+
// Recovery option 1: Use restore() to recover the sandbox
99+
println!("\n7. Recovering sandbox using restore()...");
100+
loaded.restore(&snapshot)?;
101+
assert!(!loaded.is_poisoned()?);
102+
println!(" is_poisoned after restore: {}", loaded.is_poisoned()?);
103+
104+
// Now we can call guest functions again
105+
println!("\n8. Calling guest function after recovery...");
106+
let result: i32 = loaded.call_guest_function("CalcFib", 10i32)?;
107+
println!(" CalcFib(10) returned: {} (expected 55)", result);
108+
109+
// Demonstrate recovery option 2: unload_module
110+
println!("\n9. Demonstrating unload_module recovery...");
111+
112+
// First, poison the sandbox again
113+
let interrupt = loaded.interrupt_handle()?;
114+
thread::spawn(move || {
115+
thread::sleep(Duration::from_millis(500));
116+
interrupt.kill();
117+
});
118+
let _ = loaded.call_guest_function::<i32>("KeepCPUBusy", 100000i32);
119+
120+
assert!(loaded.is_poisoned()?);
121+
println!(" Sandbox poisoned again {}", loaded.is_poisoned()?);
122+
123+
// unload_module() will recover the sandbox
124+
let wasm_sandbox = loaded.unload_module()?;
125+
println!(" Module unloaded (this calls restore internally)");
126+
127+
// Load a different module and continue
128+
let hello_path = get_wasm_module_path("HelloWorld.aot")?;
129+
let mut new_loaded = wasm_sandbox.load_module(hello_path)?;
130+
assert!(!new_loaded.is_poisoned()?);
131+
println!(
132+
" New module loaded, is_poisoned: {}",
133+
new_loaded.is_poisoned()?
134+
);
135+
136+
let result: i32 =
137+
new_loaded.call_guest_function("HelloWorld", "Recovery successful!".to_string())?;
138+
139+
println!(" HelloWorld returned: {}", result);
140+
141+
println!("\n=== Example Complete ===");
142+
Ok(())
143+
}

src/hyperlight_wasm/src/sandbox/loaded_wasm_sandbox.rs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,19 @@ impl LoadedWasmSandbox {
5252
/// On success, return an `Ok` with the return
5353
/// value and a new copy of `Self` suitable for further use. On failure,
5454
/// return an appropriate `Err`.
55+
///
56+
/// # Errors
57+
///
58+
/// Returns `Err(HyperlightError::PoisonedSandbox)` if the sandbox is in a
59+
/// poisoned state. Use [`restore()`](Self::restore) to recover a poisoned
60+
/// sandbox before calling this method again.
61+
///
62+
/// Note: A sandbox becomes poisoned when a *previous* call fails due to
63+
/// abnormal guest execution. That call returns the original error (e.g.,
64+
/// `ExecutionCanceledByHost` from `interrupt_handle().kill()`, or errors
65+
/// from guest panics, memory violations, etc.), and the sandbox is marked
66+
/// as poisoned. This method then returns `PoisonedSandbox` on subsequent
67+
/// calls until the sandbox is recovered.
5568
pub fn call_guest_function<Output: SupportedReturnType>(
5669
&mut self,
5770
fn_name: &str,
@@ -64,6 +77,15 @@ impl LoadedWasmSandbox {
6477
}
6578

6679
/// Take a snapshot of the current state of the sandbox.
80+
///
81+
/// The snapshot can later be used with [`restore()`](Self::restore) to
82+
/// return the sandbox to this state.
83+
///
84+
/// # Errors
85+
///
86+
/// Returns `Err(HyperlightError::PoisonedSandbox)` if the sandbox is in a
87+
/// poisoned state. Use [`restore()`](Self::restore) with a previously
88+
/// taken snapshot to recover before taking a new snapshot.
6789
pub fn snapshot(&mut self) -> Result<Snapshot> {
6890
match &mut self.inner {
6991
Some(inner) => inner.snapshot(),
@@ -72,14 +94,29 @@ impl LoadedWasmSandbox {
7294
}
7395

7496
/// Restore the state of the sandbox to the state captured in the given snapshot.
97+
///
98+
/// This method clears the poisoned state if the sandbox was poisoned, making
99+
/// it usable again for guest function calls.
100+
///
101+
/// # Recovery from poisoned state
102+
///
103+
/// If a sandbox becomes poisoned (e.g., after `interrupt_handle().kill()`),
104+
/// calling `restore()` with a valid snapshot will:
105+
/// 1. Clear the poisoned state
106+
/// 2. Reset memory to the snapshot state
107+
/// 3. Allow subsequent [`call_guest_function()`](Self::call_guest_function) calls to succeed
75108
pub fn restore(&mut self, snapshot: &Snapshot) -> Result<()> {
76109
match &mut self.inner {
77110
Some(inner) => inner.restore(snapshot),
78111
None => log_then_return!("No inner MultiUseSandbox to restore"),
79112
}
80113
}
81114

82-
/// unload the wasm module and return a `WasmSandbox` that can be used to load another module
115+
/// Unload the wasm module and return a `WasmSandbox` that can be used to load another module.
116+
///
117+
/// This method internally calls [`restore()`](Self::restore) to reset the sandbox to its
118+
/// pre-module state, which also clears any poisoned state. This means `unload_module()`
119+
/// can be called on a poisoned sandbox to recover it.
83120
pub fn unload_module(mut self) -> Result<WasmSandbox> {
84121
let sandbox = self
85122
.inner
@@ -119,6 +156,36 @@ impl LoadedWasmSandbox {
119156
))
120157
}
121158
}
159+
160+
/// Check if the sandbox is in a poisoned state.
161+
///
162+
/// A sandbox becomes poisoned when guest execution does not complete normally,
163+
/// such as after:
164+
/// - Forced termination via `interrupt_handle().kill()`
165+
/// - Guest panic or abort
166+
/// - Memory violation
167+
/// - Stack or heap exhaustion
168+
///
169+
/// Note: The call that causes poisoning returns the original error (e.g.,
170+
/// `ExecutionCanceledByHost`), not `PoisonedSandbox`. The sandbox is marked
171+
/// as poisoned after that error, and subsequent calls to
172+
/// [`call_guest_function()`](Self::call_guest_function) will return
173+
/// `Err(HyperlightError::PoisonedSandbox)`.
174+
///
175+
/// A poisoned sandbox cannot execute guest functions until recovered via
176+
/// [`restore()`](Self::restore). Calling [`unload_module()`](Self::unload_module)
177+
/// will also recover a poisoned sandbox since it performs a restore internally.
178+
///
179+
/// # Returns
180+
/// - `Ok(true)` if the sandbox is poisoned and needs recovery
181+
/// - `Ok(false)` if the sandbox is healthy and can execute guest functions
182+
/// - `Err` if the sandbox is in an invalid state
183+
pub fn is_poisoned(&self) -> Result<bool> {
184+
match &self.inner {
185+
Some(inner) => Ok(inner.poisoned()),
186+
None => log_then_return!("No inner MultiUseSandbox to check poisoned state"),
187+
}
188+
}
122189
}
123190

124191
impl Callable for LoadedWasmSandbox {

0 commit comments

Comments
 (0)