Skip to content

Commit d74a279

Browse files
authored
Prevent openat from trapping on seccomp thread, by making it return EACCES instead (hyperlight-dev#573)
* Prevent openat from trapping on seccomp thread, by making it return EACCES instead Signed-off-by: Ludvig Liljenberg <[email protected]> * Allow openat to be allowed if specified in extra_allowed_syscalls Signed-off-by: Ludvig Liljenberg <[email protected]> --------- Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent 1822d9a commit d74a279

File tree

4 files changed

+164
-10
lines changed

4 files changed

+164
-10
lines changed

src/hyperlight_host/src/sandbox/host_funcs.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,9 @@ fn maybe_with_seccomp<T: Send>(
162162
.name(format!("Host Function Worker Thread for: {name:?}"))
163163
.spawn(move |_| {
164164
let seccomp_filter = get_seccomp_filter_for_host_function_worker_thread(syscalls)?;
165-
seccompiler::apply_filter(&seccomp_filter)?;
165+
seccomp_filter
166+
.iter()
167+
.try_for_each(|filter| seccompiler::apply_filter(filter))?;
166168

167169
// We have a `catch_unwind` here because, if a disallowed syscall is issued,
168170
// we handle it by panicking. This is to avoid returning execution to the

src/hyperlight_host/src/sandbox/initialized_multi_use.rs

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,92 @@ mod tests {
458458
Ok(())
459459
}
460460

461+
// We have a secomp specifically for `openat`, but we don't want to crash on `openat`, but rather make sure `openat` returns `EACCES`
462+
#[test]
463+
#[cfg(target_os = "linux")]
464+
fn violate_seccomp_filters_openat() -> Result<()> {
465+
// Hostcall to call `openat`.
466+
fn make_openat_syscall() -> Result<i64> {
467+
use std::ffi::CString;
468+
469+
let path = CString::new("/proc/sys/vm/overcommit_memory").unwrap();
470+
471+
let fd_or_err = unsafe {
472+
libc::syscall(
473+
libc::SYS_openat,
474+
libc::AT_FDCWD,
475+
path.as_ptr(),
476+
libc::O_RDONLY,
477+
)
478+
};
479+
480+
if fd_or_err == -1 {
481+
Ok((-std::io::Error::last_os_error().raw_os_error().unwrap()).into())
482+
} else {
483+
Ok(fd_or_err)
484+
}
485+
}
486+
{
487+
// First make sure a regular call to `openat` on /proc/sys/vm/overcommit_memory succeeds
488+
let ret = make_openat_syscall()?;
489+
assert!(
490+
ret >= 0,
491+
"Expected openat syscall to succeed, got: {:?}",
492+
ret
493+
);
494+
495+
let mut ubox = UninitializedSandbox::new(
496+
GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")),
497+
None,
498+
)
499+
.unwrap();
500+
ubox.register("Openat_Hostfunc", make_openat_syscall)?;
501+
502+
let mut sbox = ubox.evolve(Noop::default()).unwrap();
503+
let host_func_result = sbox
504+
.call_guest_function_by_name::<i64>(
505+
"CallGivenParamlessHostFuncThatReturnsI64",
506+
"Openat_Hostfunc".to_string(),
507+
)
508+
.expect("Expected to call host function that returns i64");
509+
510+
if cfg!(feature = "seccomp") {
511+
// If seccomp is enabled, we expect the syscall to return EACCES, as setup by our seccomp filter
512+
assert_eq!(host_func_result, -libc::EACCES as i64);
513+
} else {
514+
// If seccomp is not enabled, we expect the syscall to succeed
515+
assert!(host_func_result >= 0);
516+
}
517+
}
518+
519+
#[cfg(feature = "seccomp")]
520+
{
521+
// Now let's make sure if we register the `openat` syscall as an extra allowed syscall, it will succeed
522+
let mut ubox = UninitializedSandbox::new(
523+
GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")),
524+
None,
525+
)
526+
.unwrap();
527+
ubox.register_with_extra_allowed_syscalls(
528+
"Openat_Hostfunc",
529+
make_openat_syscall,
530+
[libc::SYS_openat],
531+
)?;
532+
let mut sbox = ubox.evolve(Noop::default()).unwrap();
533+
let host_func_result = sbox
534+
.call_guest_function_by_name::<i64>(
535+
"CallGivenParamlessHostFuncThatReturnsI64",
536+
"Openat_Hostfunc".to_string(),
537+
)
538+
.expect("Expected to call host function that returns i64");
539+
540+
// should pass regardless of seccomp feature
541+
assert!(host_func_result >= 0);
542+
}
543+
544+
Ok(())
545+
}
546+
461547
#[test]
462548
fn test_trigger_exception_on_guest() {
463549
let usbox = UninitializedSandbox::new(

src/hyperlight_host/src/seccomp/guest.rs

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ limitations under the License.
1717
use seccompiler::SeccompCmpOp::Eq;
1818
use seccompiler::{
1919
BpfProgram, SeccompAction, SeccompCmpArgLen as ArgLen, SeccompCondition as Cond, SeccompFilter,
20-
SeccompRule,
20+
SeccompRule, TargetArch,
2121
};
2222

2323
use crate::sandbox::ExtraAllowedSyscall;
@@ -59,10 +59,14 @@ fn syscalls_allowlist() -> Result<Vec<(i64, Vec<SeccompRule>)>> {
5959
(libc::SYS_sched_yield, vec![]),
6060
// `mprotect` is needed by malloc during memory allocation
6161
(libc::SYS_mprotect, vec![]),
62+
// `openat` is marked allowed here because it may be called by `libc::free()`
63+
// since it will try to open /proc/sys/vm/overcommit_memory (https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/malloc-sysdep.h;h=778d8971d53e284397c3a5dcdd923e93be5e4731;hb=HEAD)
64+
// We have another more restrictive filter for it below so it will return EACCES instead of trap, in which case libc will use the default value
65+
(libc::SYS_openat, vec![]),
6266
])
6367
}
6468

65-
/// Creates a `BpfProgram` for a `SeccompFilter` over specific syscalls/`SeccompRule`s
69+
/// Creates two `BpfProgram`s for a `SeccompFilter` over specific syscalls/`SeccompRule`s
6670
/// intended to be applied on host function threads.
6771
///
6872
/// Note: This does not provide coverage over the Hyperlight host, which is why we don't need
@@ -71,7 +75,7 @@ fn syscalls_allowlist() -> Result<Vec<(i64, Vec<SeccompRule>)>> {
7175
/// or `KVM_CREATE_VCPU`).
7276
pub(crate) fn get_seccomp_filter_for_host_function_worker_thread(
7377
extra_allowed_syscalls: Option<&[ExtraAllowedSyscall]>,
74-
) -> Result<BpfProgram> {
78+
) -> Result<Vec<BpfProgram>> {
7579
let mut allowed_syscalls = syscalls_allowlist()?;
7680

7781
if let Some(extra_allowed_syscalls) = extra_allowed_syscalls {
@@ -87,11 +91,50 @@ pub(crate) fn get_seccomp_filter_for_host_function_worker_thread(
8791
allowed_syscalls.dedup();
8892
}
8993

90-
Ok(SeccompFilter::new(
94+
let arch: TargetArch = std::env::consts::ARCH.try_into()?;
95+
96+
// Allowlist filter that traps on unknown syscalls
97+
let allowlist = SeccompFilter::new(
9198
allowed_syscalls.into_iter().collect(),
92-
SeccompAction::Trap, // non-match syscall will kill the offending thread
93-
SeccompAction::Allow, // match syscall will be allowed
94-
std::env::consts::ARCH.try_into()?,
95-
)
96-
.and_then(|filter| filter.try_into())?)
99+
SeccompAction::Trap,
100+
SeccompAction::Allow,
101+
arch,
102+
)?
103+
.try_into()?;
104+
105+
// If `openat` is an exclicitly allowed syscall, we shouldn't return the filter that forces it to return EACCES.
106+
if let Some(extra_syscalls) = extra_allowed_syscalls {
107+
if extra_syscalls.contains(&libc::SYS_openat) {
108+
return Ok(vec![allowlist]);
109+
}
110+
}
111+
// Otherwise, we return both filters.
112+
113+
// Filter that forces `openat` to return EACCES
114+
let errno_on_openat = SeccompFilter::new(
115+
[(libc::SYS_openat, vec![])].into_iter().collect(),
116+
SeccompAction::Allow,
117+
SeccompAction::Errno(libc::EACCES.try_into()?),
118+
arch,
119+
)?
120+
.try_into()?;
121+
122+
// Note: the order of the 2 filters are important. If we applied the strict filter first,
123+
// we wouldn't be allowed to setup the second filter (would be trapped since the syscalls to setup seccomp are not allowed).
124+
// However, from an seccomp filter perspective, the order of the filters is not important:
125+
//
126+
// If multiple filters exist, they are all executed, in reverse order
127+
// of their addition to the filter tree—that is, the most recently
128+
// installed filter is executed first. (Note that all filters will
129+
// be called even if one of the earlier filters returns
130+
// SECCOMP_RET_KILL. This is done to simplify the kernel code and to
131+
// provide a tiny speed-up in the execution of sets of filters by
132+
// avoiding a check for this uncommon case.) The return value for
133+
// the evaluation of a given system call is the first-seen action
134+
// value of highest precedence (along with its accompanying data)
135+
// returned by execution of all of the filters.
136+
//
137+
// (https://man7.org/linux/man-pages/man2/seccomp.2.html).
138+
//
139+
Ok(vec![errno_on_openat, allowlist])
97140
}

src/tests/rust_guests/simpleguest/src/main.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,21 @@ fn violate_seccomp_filters(function_call: &FunctionCall) -> Result<Vec<u8>> {
700700
}
701701
}
702702

703+
fn call_given_paramless_hostfunc_that_returns_i64(function_call: &FunctionCall) -> Result<Vec<u8>> {
704+
if let ParameterValue::String(hostfuncname) =
705+
function_call.parameters.clone().unwrap()[0].clone()
706+
{
707+
let res = call_host_function::<i64>(&hostfuncname, None, ReturnType::Long)?;
708+
709+
Ok(get_flatbuffer_result(res))
710+
} else {
711+
Err(HyperlightGuestError::new(
712+
ErrorCode::GuestFunctionParameterTypeMismatch,
713+
"Invalid parameters passed to test_rust_malloc".to_string(),
714+
))
715+
}
716+
}
717+
703718
fn add(function_call: &FunctionCall) -> Result<Vec<u8>> {
704719
if let (ParameterValue::Int(a), ParameterValue::Int(b)) = (
705720
function_call.parameters.clone().unwrap()[0].clone(),
@@ -1137,6 +1152,14 @@ pub extern "C" fn hyperlight_main() {
11371152
large_parameters as usize,
11381153
);
11391154
register_function(large_parameters_def);
1155+
1156+
let call_given_hostfunc_def = GuestFunctionDefinition::new(
1157+
"CallGivenParamlessHostFuncThatReturnsI64".to_string(),
1158+
Vec::from(&[ParameterType::String]),
1159+
ReturnType::Long,
1160+
call_given_paramless_hostfunc_that_returns_i64 as usize,
1161+
);
1162+
register_function(call_given_hostfunc_def);
11401163
}
11411164

11421165
#[no_mangle]

0 commit comments

Comments
 (0)