Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions litebox/src/fs/in_mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,21 @@ impl<Platform: sync::RawSyncPrimitivesProvider> super::FileSystem for FileSystem
blksize: BLOCK_SIZE,
})
}

fn get_static_backing_data(&self, fd: &FileFd<Platform>) -> Option<&'static [u8]> {
let descriptor_table = self.litebox.descriptor_table();
let entry = descriptor_table.get_entry(fd)?;
match &entry.entry {
Descriptor::File { file, .. } => {
let file = file.read();
match &file.data {
alloc::borrow::Cow::Borrowed(slice) => Some(*slice),
alloc::borrow::Cow::Owned(_) => None,
}
}
Descriptor::Dir { .. } => None,
}
}
}

struct RootDir<Platform: sync::RawSyncPrimitivesProvider> {
Expand Down
15 changes: 15 additions & 0 deletions litebox/src/fs/layered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1336,6 +1336,21 @@ impl<
blksize,
})
}

fn get_static_backing_data(
&self,
fd: &FileFd<Platform, Upper, Lower>,
) -> Option<&'static [u8]> {
let entry = self
.litebox
.descriptor_table()
.with_entry(fd, |descriptor| Arc::clone(&descriptor.entry.entry))?;
match entry.as_ref() {
EntryX::Upper { fd } => self.upper.get_static_backing_data(fd),
EntryX::Lower { fd } => self.lower.get_static_backing_data(fd),
EntryX::Tombstone => unreachable!(),
}
}
}

struct Descriptor<Upper: super::FileSystem + 'static, Lower: super::FileSystem + 'static> {
Expand Down
11 changes: 11 additions & 0 deletions litebox/src/fs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,17 @@ pub trait FileSystem: private::Sealed + FdEnabledSubsystem {

/// Equivalent to [`Self::file_status`], but open an open `fd` instead.
fn fd_file_status(&self, fd: &TypedFd<Self>) -> Result<FileStatus, FileStatusError>;

/// Get static backing data for a file, if available and supported.
///
/// This method returns the (entire) underlying static byte slice if the file's contents are
/// backed by borrowed static data (e.g., loaded via `initialize_primarily_read_heavy_file`).
///
/// Returns `None` if indicating no static backing data is available/supported.
#[expect(unused_variables, reason = "default body, non-underscored param names")]
fn get_static_backing_data(&self, fd: &TypedFd<Self>) -> Option<&'static [u8]> {
None
}
}

bitflags! {
Expand Down
13 changes: 13 additions & 0 deletions litebox/src/mm/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,19 @@ impl<Platform: PageManagementProvider<ALIGN> + 'static, const ALIGN: usize> Vmem
self.vmas.iter()
}

/// Insert an already-allocated region (e.g., via CoW) without calling the platform allocator.
///
/// Any existing tracked mappings that overlap `range` are silently removed from tracking
/// (without calling the platform deallocator) before inserting. Use [`Self::overlapping`] to
/// check for overlap before running this if needed.
pub(super) fn register_existing_mapping_overwrite(
&mut self,
range: PageRange<ALIGN>,
vma: VmArea,
) {
self.vmas.insert(range.into(), vma);
}

/// Gets an iterator over all the stored ranges that are
/// either partially or completely overlapped by the given range.
pub(super) fn overlapping(
Expand Down
37 changes: 35 additions & 2 deletions litebox/src/mm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use core::ops::Range;

use alloc::vec::Vec;
use linux::{
CreatePagesFlags, MappingError, PageFaultError, PageRange, VmFlags, Vmem, VmemPageFaultHandler,
VmemProtectError, VmemUnmapError,
CreatePagesFlags, MappingError, PageFaultError, PageRange, VmArea, VmFlags, Vmem,
VmemPageFaultHandler, VmemProtectError, VmemUnmapError,
};

use crate::{
Expand Down Expand Up @@ -591,6 +591,39 @@ where
)
}

/// Register an already-allocated memory region in the VMA tracker.
///
/// This is used when memory has been allocated by some means other than the normal
/// `create_*_pages` path (e.g., CoW mappings created directly by the platform), so that the
/// page manager tracks the region for future `mprotect`, `munmap`, etc.
///
/// If `replace` is `true`, any overlapping tracked mappings are evicted from the tracker
/// (without calling the platform deallocator) before inserting. Otherwise, returns `None`
/// without registering if the provided `range` overlaps with any existing mapping.
///
/// # Safety
///
/// The `range` must be an already-mapped region with the given `permissions`.
#[must_use]
pub unsafe fn register_existing_mapping(
&self,
range: PageRange<ALIGN>,
permissions: MemoryRegionPermissions,
is_file_backed: bool,
replace: bool,
) -> Option<()> {
let vma = VmArea::new(
VmFlags::from(permissions) | VmFlags::VM_MAY_ACCESS_FLAGS,
is_file_backed,
);
let mut vmem = self.vmem.write();
if !replace && vmem.overlapping(range.into()).next().is_some() {
return None;
}
vmem.register_existing_mapping_overwrite(range, vma);
Some(())
}

/// Returns all mappings in a vector.
pub fn mappings(&self) -> Vec<(Range<usize>, VmFlags)> {
self.vmem
Expand Down
45 changes: 45 additions & 0 deletions litebox/src/platform/page_mgmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,25 @@ pub trait PageManagementProvider<const ALIGN: usize>: RawPointerProvider {
///
/// Note that the returned ranges should be `ALIGN`-aligned.
fn reserved_pages(&self) -> impl Iterator<Item = &Range<usize>>;

/// Attempt to allocate pages with copy-on-write semantics backed by static data.
///
/// This method allows platforms that support it to create CoW mappings instead of performing
/// expensive page-by-page memory copies. This is particularly useful when mapping pre-loaded
/// file data that was mmap'd by the host.
///
/// The default implementation returns unsupported CoW. Platforms that DO support COW should
/// override this method to unlock better performance.
#[expect(unused_variables, reason = "default body, non-underscored param names")]
fn try_allocate_cow_pages(
&self,
suggested_start: usize,
source_data: &'static [u8],
permissions: MemoryRegionPermissions,
fixed_address_behavior: FixedAddressBehavior,
) -> Result<Self::RawMutPointer<u8>, CowAllocationError> {
Err(CowAllocationError::UnsupportedByPlatform)
}
}

/// Behavior when allocating pages at a fixed address.
Expand Down Expand Up @@ -243,3 +262,29 @@ pub enum PermissionUpdateError {
#[error("provided range contains unallocated pages")]
Unallocated,
}

/// Possible errors for [`PageManagementProvider::try_allocate_cow_pages`]
///
/// ```text
/// ____________________
/// ( Maybe the grass is )
/// ( greener on the )
/// ( other side? )
/// --------------------
/// o ^__^
/// o (oo)\_______
/// (__)\ )\/\
/// ||----w |
/// || ||
/// ```
#[derive(Error, Debug)]
pub enum CowAllocationError {
#[error("copy-on-write page allocation is not supported for this particular platform")]
UnsupportedByPlatform,
#[error("source region is not copy-on-writable")]
UnsupportedSourceRegion,
#[error("unaligned request")]
Unaligned,
#[error("internal failure in creating CoW pages")]
InternalFailure,
}
143 changes: 142 additions & 1 deletion litebox_platform_linux_userland/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@

use std::cell::Cell;
use std::os::fd::{AsRawFd as _, FromRawFd as _};
use std::path::PathBuf;
use std::sync::atomic::{AtomicI32, AtomicU32, Ordering};
use std::time::Duration;

use litebox::fs::OFlags;
use litebox::platform::UnblockedOrTimedOut;
use litebox::platform::page_mgmt::{FixedAddressBehavior, MemoryRegionPermissions};
use litebox::platform::page_mgmt::{
CowAllocationError, FixedAddressBehavior, MemoryRegionPermissions,
};
use litebox::platform::{ImmediatelyWokenUp, RawConstPointer as _};
use litebox::shim::ContinueOperation;
use litebox::utils::{ReinterpretSignedExt, ReinterpretUnsignedExt as _, TruncateExt};
Expand All @@ -40,6 +43,9 @@ pub struct LinuxUserland {
reserved_pages: Vec<core::ops::Range<usize>>,
/// The base address of the VDSO.
vdso_address: Option<usize>,
/// CoW-eligible memory regions. Maps start address of the static slice, to the info needed to
/// re-mmap the file.
cow_regions: std::sync::RwLock<std::collections::BTreeMap<usize, CowRegionInfo>>,
}

impl core::fmt::Debug for LinuxUserland {
Expand All @@ -48,6 +54,15 @@ impl core::fmt::Debug for LinuxUserland {
}
}

/// Information about a CoW-eligible memory region backed by a file.
#[derive(Debug, Clone)]
struct CowRegionInfo {
/// The path to the backing file on the host filesystem.
file_path: PathBuf,
/// Length of the backing file.
file_length: usize,
}

const IF_NAMESIZE: usize = 16;
/// Use TUN device
const IFF_TUN: i32 = 0x0001;
Expand Down Expand Up @@ -163,10 +178,53 @@ impl LinuxUserland {
seccomp_interception_enabled: std::sync::atomic::AtomicBool::new(false),
reserved_pages,
vdso_address,
cow_regions: std::sync::RwLock::new(std::collections::BTreeMap::new()),
};
Box::leak(Box::new(platform))
}

/// Register a CoW-eligible memory region backed by a file.
///
/// # Panics
///
/// Panics if an overlapping region is already registered.
pub fn register_cow_region(&self, data: &'static [u8], file_path: impl Into<PathBuf>) {
let start = data.as_ptr() as usize;
let info = CowRegionInfo {
file_path: file_path.into(),
file_length: data.len(),
};

let mut regions = self.cow_regions.write().unwrap();
assert!(
regions.range(start..start + data.len()).next().is_none(),
"Attempting to register an overlapping region"
);
let old = regions.insert(start, info);
assert!(old.is_none());
}

/// Look up the file backing a static slice for CoW mapping.
///
/// Returns `Some((file_path, offset_in_file))` if the slice is backed by a registered
/// CoW region, `None` otherwise.
fn lookup_cow_region(&self, source_data: &'static [u8]) -> Option<(PathBuf, usize)> {
let slice_start = source_data.as_ptr() as usize;
let slice_len = source_data.len();

let regions = self.cow_regions.read().unwrap();

if let Some((&region_start, info)) = regions.range(..=slice_start).next_back() {
let region_end = region_start.checked_add(info.file_length).unwrap();
let slice_end = slice_start.checked_add(slice_len).unwrap();

if slice_start >= region_start && slice_end <= region_end {
return Some((info.file_path.clone(), slice_start - region_start));
}
}
None
}

/// Enable seccomp syscall interception on the platform.
///
/// # Panics
Expand Down Expand Up @@ -1490,6 +1548,89 @@ impl<const ALIGN: usize> litebox::platform::PageManagementProvider<ALIGN> for Li
fn reserved_pages(&self) -> impl Iterator<Item = &core::ops::Range<usize>> {
self.reserved_pages.iter()
}

fn try_allocate_cow_pages(
&self,
suggested_start: usize,
source_data: &'static [u8],
permissions: MemoryRegionPermissions,
fixed_address_behavior: FixedAddressBehavior,
) -> Result<Self::RawMutPointer<u8>, CowAllocationError> {
let Some((file_path, file_offset)) = self.lookup_cow_region(source_data) else {
return Err(CowAllocationError::UnsupportedSourceRegion);
};
if !file_offset.is_multiple_of(ALIGN) {
return Err(CowAllocationError::Unaligned);
}

let file_path_cstr =
std::ffi::CString::new(file_path.as_os_str().as_encoded_bytes()).unwrap();
// TODO(jb): We should likely be storing pre-opened FDs, right?
let fd = unsafe {
syscalls::syscall4(
syscalls::Sysno::open,
file_path_cstr.as_ptr() as usize,
OFlags::RDONLY.bits() as usize,
0,
// Unused by the syscall but would be checked by Seccomp filter if enabled.
syscall_intercept::SYSCALL_ARG_MAGIC,
)
};
let fd = fd.expect("file should remain unchanged on host");

let mut flags = MapFlags::MAP_PRIVATE;
match fixed_address_behavior {
FixedAddressBehavior::Hint => {}
FixedAddressBehavior::Replace => flags |= MapFlags::MAP_FIXED,
FixedAddressBehavior::NoReplace => flags |= MapFlags::MAP_FIXED_NOREPLACE,
}

let result = unsafe {
syscalls::syscall6(
{
#[cfg(target_arch = "x86_64")]
{
syscalls::Sysno::mmap
}
#[cfg(target_arch = "x86")]
{
syscalls::Sysno::mmap2
}
},
suggested_start,
source_data.len(),
prot_flags(permissions).bits().reinterpret_as_unsigned() as usize,
(flags.bits().reinterpret_as_unsigned()
// This is to ensure it won't be intercepted by Seccomp if enabled.
| syscall_intercept::MMAP_FLAG_MAGIC) as usize,
fd,
{
#[cfg(target_arch = "x86_64")]
{
file_offset
}
#[cfg(target_arch = "x86")]
{
// mmap2 takes offset in pages, not bytes
file_offset / ALIGN
}
},
)
};

let _ = unsafe {
syscalls::syscall2(
syscalls::Sysno::close,
fd, // This is to ensure it won't be intercepted by Seccomp if enabled.
syscall_intercept::SYSCALL_ARG_MAGIC,
)
};

match result {
Ok(ptr) => Ok(UserMutPtr::from_usize(ptr)),
Err(_) => Err(CowAllocationError::InternalFailure),
}
}
}

impl litebox::platform::StdioProvider for LinuxUserland {
Expand Down
Loading