Skip to content

Commit d03cf6a

Browse files
Opportunistic CoW for platforms that support it
1 parent 88d4df5 commit d03cf6a

File tree

9 files changed

+433
-20
lines changed

9 files changed

+433
-20
lines changed

litebox/src/fs/in_mem.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,21 @@ impl<Platform: sync::RawSyncPrimitivesProvider> super::FileSystem for FileSystem
785785
blksize: BLOCK_SIZE,
786786
})
787787
}
788+
789+
fn get_static_backing_data(&self, fd: &FileFd<Platform>) -> Option<&'static [u8]> {
790+
let descriptor_table = self.litebox.descriptor_table();
791+
let entry = descriptor_table.get_entry(fd)?;
792+
match &entry.entry {
793+
Descriptor::File { file, .. } => {
794+
let file = file.read();
795+
match &file.data {
796+
alloc::borrow::Cow::Borrowed(slice) => Some(*slice),
797+
alloc::borrow::Cow::Owned(_) => None,
798+
}
799+
}
800+
Descriptor::Dir { .. } => None,
801+
}
802+
}
788803
}
789804

790805
struct RootDir<Platform: sync::RawSyncPrimitivesProvider> {

litebox/src/fs/layered.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,6 +1336,21 @@ impl<
13361336
blksize,
13371337
})
13381338
}
1339+
1340+
fn get_static_backing_data(
1341+
&self,
1342+
fd: &FileFd<Platform, Upper, Lower>,
1343+
) -> Option<&'static [u8]> {
1344+
let entry = self
1345+
.litebox
1346+
.descriptor_table()
1347+
.with_entry(fd, |descriptor| Arc::clone(&descriptor.entry.entry))?;
1348+
match entry.as_ref() {
1349+
EntryX::Upper { fd } => self.upper.get_static_backing_data(fd),
1350+
EntryX::Lower { fd } => self.lower.get_static_backing_data(fd),
1351+
EntryX::Tombstone => unreachable!(),
1352+
}
1353+
}
13391354
}
13401355

13411356
struct Descriptor<Upper: super::FileSystem + 'static, Lower: super::FileSystem + 'static> {

litebox/src/fs/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,17 @@ pub trait FileSystem: private::Sealed + FdEnabledSubsystem {
136136

137137
/// Equivalent to [`Self::file_status`], but open an open `fd` instead.
138138
fn fd_file_status(&self, fd: &TypedFd<Self>) -> Result<FileStatus, FileStatusError>;
139+
140+
/// Get static backing data for a file, if available and supported.
141+
///
142+
/// This method returns the (entire) underlying static byte slice if the file's contents are
143+
/// backed by borrowed static data (e.g., loaded via `initialize_primarily_read_heavy_file`).
144+
///
145+
/// Returns `None` if indicating no static backing data is available/supported.
146+
#[expect(unused_variables, reason = "default body, non-underscored param names")]
147+
fn get_static_backing_data(&self, fd: &TypedFd<Self>) -> Option<&'static [u8]> {
148+
None
149+
}
139150
}
140151

141152
bitflags! {

litebox/src/mm/linux.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,19 @@ impl<Platform: PageManagementProvider<ALIGN> + 'static, const ALIGN: usize> Vmem
315315
self.vmas.iter()
316316
}
317317

318+
/// Insert an already-allocated region (e.g., via CoW) without calling the platform allocator.
319+
///
320+
/// Any existing tracked mappings that overlap `range` are silently removed from tracking
321+
/// (without calling the platform deallocator) before inserting. Use [`Self::overlapping`] to
322+
/// check for overlap before running this if needed.
323+
pub(super) fn register_existing_mapping_overwrite(
324+
&mut self,
325+
range: PageRange<ALIGN>,
326+
vma: VmArea,
327+
) {
328+
self.vmas.insert(range.into(), vma);
329+
}
330+
318331
/// Gets an iterator over all the stored ranges that are
319332
/// either partially or completely overlapped by the given range.
320333
pub(super) fn overlapping(

litebox/src/mm/mod.rs

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ use core::ops::Range;
1414

1515
use alloc::vec::Vec;
1616
use linux::{
17-
CreatePagesFlags, MappingError, PageFaultError, PageRange, VmFlags, Vmem, VmemPageFaultHandler,
18-
VmemProtectError, VmemUnmapError,
17+
CreatePagesFlags, MappingError, PageFaultError, PageRange, VmArea, VmFlags, Vmem,
18+
VmemPageFaultHandler, VmemProtectError, VmemUnmapError,
1919
};
2020

2121
use crate::{
@@ -591,6 +591,39 @@ where
591591
)
592592
}
593593

594+
/// Register an already-allocated memory region in the VMA tracker.
595+
///
596+
/// This is used when memory has been allocated by some means other than the normal
597+
/// `create_*_pages` path (e.g., CoW mappings created directly by the platform), so that the
598+
/// page manager tracks the region for future `mprotect`, `munmap`, etc.
599+
///
600+
/// If `replace` is `true`, any overlapping tracked mappings are evicted from the tracker
601+
/// (without calling the platform deallocator) before inserting. Otherwise, returns `None`
602+
/// without registering if the provided `range` overlaps with any existing mapping.
603+
///
604+
/// # Safety
605+
///
606+
/// The `range` must be an already-mapped region with the given `permissions`.
607+
#[must_use]
608+
pub unsafe fn register_existing_mapping(
609+
&self,
610+
range: PageRange<ALIGN>,
611+
permissions: MemoryRegionPermissions,
612+
is_file_backed: bool,
613+
replace: bool,
614+
) -> Option<()> {
615+
let vma = VmArea::new(
616+
VmFlags::from(permissions) | VmFlags::VM_MAY_ACCESS_FLAGS,
617+
is_file_backed,
618+
);
619+
let mut vmem = self.vmem.write();
620+
if !replace && vmem.overlapping(range.into()).next().is_some() {
621+
return None;
622+
}
623+
vmem.register_existing_mapping_overwrite(range, vma);
624+
Some(())
625+
}
626+
594627
/// Returns all mappings in a vector.
595628
pub fn mappings(&self) -> Vec<(Range<usize>, VmFlags)> {
596629
self.vmem

litebox/src/platform/page_mgmt.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,25 @@ pub trait PageManagementProvider<const ALIGN: usize>: RawPointerProvider {
172172
///
173173
/// Note that the returned ranges should be `ALIGN`-aligned.
174174
fn reserved_pages(&self) -> impl Iterator<Item = &Range<usize>>;
175+
176+
/// Attempt to allocate pages with copy-on-write semantics backed by static data.
177+
///
178+
/// This method allows platforms that support it to create CoW mappings instead of performing
179+
/// expensive page-by-page memory copies. This is particularly useful when mapping pre-loaded
180+
/// file data that was mmap'd by the host.
181+
///
182+
/// The default implementation returns unsupported CoW. Platforms that DO support COW should
183+
/// override this method to unlock better performance.
184+
#[expect(unused_variables, reason = "default body, non-underscored param names")]
185+
fn try_allocate_cow_pages(
186+
&self,
187+
suggested_start: usize,
188+
source_data: &'static [u8],
189+
permissions: MemoryRegionPermissions,
190+
fixed_address_behavior: FixedAddressBehavior,
191+
) -> Result<Self::RawMutPointer<u8>, CowAllocationError> {
192+
Err(CowAllocationError::UnsupportedByPlatform)
193+
}
175194
}
176195

177196
/// Behavior when allocating pages at a fixed address.
@@ -243,3 +262,29 @@ pub enum PermissionUpdateError {
243262
#[error("provided range contains unallocated pages")]
244263
Unallocated,
245264
}
265+
266+
/// Possible errors for [`PageManagementProvider::try_allocate_cow_pages`]
267+
///
268+
/// ```text
269+
/// ____________________
270+
/// ( Maybe the grass is )
271+
/// ( greener on the )
272+
/// ( other side? )
273+
/// --------------------
274+
/// o ^__^
275+
/// o (oo)\_______
276+
/// (__)\ )\/\
277+
/// ||----w |
278+
/// || ||
279+
/// ```
280+
#[derive(Error, Debug)]
281+
pub enum CowAllocationError {
282+
#[error("copy-on-write page allocation is not supported for this particular platform")]
283+
UnsupportedByPlatform,
284+
#[error("source region is not copy-on-writable")]
285+
UnsupportedSourceRegion,
286+
#[error("unaligned request")]
287+
Unaligned,
288+
#[error("internal failure in creating CoW pages")]
289+
InternalFailure,
290+
}

litebox_platform_linux_userland/src/lib.rs

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@
99

1010
use std::cell::Cell;
1111
use std::os::fd::{AsRawFd as _, FromRawFd as _};
12+
use std::path::PathBuf;
1213
use std::sync::atomic::{AtomicI32, AtomicU32, Ordering};
1314
use std::time::Duration;
1415

1516
use litebox::fs::OFlags;
1617
use litebox::platform::UnblockedOrTimedOut;
17-
use litebox::platform::page_mgmt::{FixedAddressBehavior, MemoryRegionPermissions};
18+
use litebox::platform::page_mgmt::{
19+
CowAllocationError, FixedAddressBehavior, MemoryRegionPermissions,
20+
};
1821
use litebox::platform::{ImmediatelyWokenUp, RawConstPointer as _};
1922
use litebox::shim::ContinueOperation;
2023
use litebox::utils::{ReinterpretSignedExt, ReinterpretUnsignedExt as _, TruncateExt};
@@ -40,6 +43,9 @@ pub struct LinuxUserland {
4043
reserved_pages: Vec<core::ops::Range<usize>>,
4144
/// The base address of the VDSO.
4245
vdso_address: Option<usize>,
46+
/// CoW-eligible memory regions. Maps start address of the static slice, to the info needed to
47+
/// re-mmap the file.
48+
cow_regions: std::sync::RwLock<std::collections::BTreeMap<usize, CowRegionInfo>>,
4349
}
4450

4551
impl core::fmt::Debug for LinuxUserland {
@@ -48,6 +54,15 @@ impl core::fmt::Debug for LinuxUserland {
4854
}
4955
}
5056

57+
/// Information about a CoW-eligible memory region backed by a file.
58+
#[derive(Debug, Clone)]
59+
struct CowRegionInfo {
60+
/// The path to the backing file on the host filesystem.
61+
file_path: PathBuf,
62+
/// Length of the backing file.
63+
file_length: usize,
64+
}
65+
5166
const IF_NAMESIZE: usize = 16;
5267
/// Use TUN device
5368
const IFF_TUN: i32 = 0x0001;
@@ -163,10 +178,53 @@ impl LinuxUserland {
163178
seccomp_interception_enabled: std::sync::atomic::AtomicBool::new(false),
164179
reserved_pages,
165180
vdso_address,
181+
cow_regions: std::sync::RwLock::new(std::collections::BTreeMap::new()),
166182
};
167183
Box::leak(Box::new(platform))
168184
}
169185

186+
/// Register a CoW-eligible memory region backed by a file.
187+
///
188+
/// # Panics
189+
///
190+
/// Panics if an overlapping region is already registered.
191+
pub fn register_cow_region(&self, data: &'static [u8], file_path: impl Into<PathBuf>) {
192+
let start = data.as_ptr() as usize;
193+
let info = CowRegionInfo {
194+
file_path: file_path.into(),
195+
file_length: data.len(),
196+
};
197+
198+
let mut regions = self.cow_regions.write().unwrap();
199+
assert!(
200+
regions.range(start..start + data.len()).next().is_none(),
201+
"Attempting to register an overlapping region"
202+
);
203+
let old = regions.insert(start, info);
204+
assert!(old.is_none());
205+
}
206+
207+
/// Look up the file backing a static slice for CoW mapping.
208+
///
209+
/// Returns `Some((file_path, offset_in_file))` if the slice is backed by a registered
210+
/// CoW region, `None` otherwise.
211+
fn lookup_cow_region(&self, source_data: &'static [u8]) -> Option<(PathBuf, usize)> {
212+
let slice_start = source_data.as_ptr() as usize;
213+
let slice_len = source_data.len();
214+
215+
let regions = self.cow_regions.read().unwrap();
216+
217+
if let Some((&region_start, info)) = regions.range(..=slice_start).next_back() {
218+
let region_end = region_start.checked_add(info.file_length).unwrap();
219+
let slice_end = slice_start.checked_add(slice_len).unwrap();
220+
221+
if slice_start >= region_start && slice_end <= region_end {
222+
return Some((info.file_path.clone(), slice_start - region_start));
223+
}
224+
}
225+
None
226+
}
227+
170228
/// Enable seccomp syscall interception on the platform.
171229
///
172230
/// # Panics
@@ -1490,6 +1548,89 @@ impl<const ALIGN: usize> litebox::platform::PageManagementProvider<ALIGN> for Li
14901548
fn reserved_pages(&self) -> impl Iterator<Item = &core::ops::Range<usize>> {
14911549
self.reserved_pages.iter()
14921550
}
1551+
1552+
fn try_allocate_cow_pages(
1553+
&self,
1554+
suggested_start: usize,
1555+
source_data: &'static [u8],
1556+
permissions: MemoryRegionPermissions,
1557+
fixed_address_behavior: FixedAddressBehavior,
1558+
) -> Result<Self::RawMutPointer<u8>, CowAllocationError> {
1559+
let Some((file_path, file_offset)) = self.lookup_cow_region(source_data) else {
1560+
return Err(CowAllocationError::UnsupportedSourceRegion);
1561+
};
1562+
if !file_offset.is_multiple_of(ALIGN) {
1563+
return Err(CowAllocationError::Unaligned);
1564+
}
1565+
1566+
let file_path_cstr =
1567+
std::ffi::CString::new(file_path.as_os_str().as_encoded_bytes()).unwrap();
1568+
// TODO(jb): We should likely be storing pre-opened FDs, right?
1569+
let fd = unsafe {
1570+
syscalls::syscall4(
1571+
syscalls::Sysno::open,
1572+
file_path_cstr.as_ptr() as usize,
1573+
OFlags::RDONLY.bits() as usize,
1574+
0,
1575+
// Unused by the syscall but would be checked by Seccomp filter if enabled.
1576+
syscall_intercept::SYSCALL_ARG_MAGIC,
1577+
)
1578+
};
1579+
let fd = fd.expect("file should remain unchanged on host");
1580+
1581+
let mut flags = MapFlags::MAP_PRIVATE;
1582+
match fixed_address_behavior {
1583+
FixedAddressBehavior::Hint => {}
1584+
FixedAddressBehavior::Replace => flags |= MapFlags::MAP_FIXED,
1585+
FixedAddressBehavior::NoReplace => flags |= MapFlags::MAP_FIXED_NOREPLACE,
1586+
}
1587+
1588+
let result = unsafe {
1589+
syscalls::syscall6(
1590+
{
1591+
#[cfg(target_arch = "x86_64")]
1592+
{
1593+
syscalls::Sysno::mmap
1594+
}
1595+
#[cfg(target_arch = "x86")]
1596+
{
1597+
syscalls::Sysno::mmap2
1598+
}
1599+
},
1600+
suggested_start,
1601+
source_data.len(),
1602+
prot_flags(permissions).bits().reinterpret_as_unsigned() as usize,
1603+
(flags.bits().reinterpret_as_unsigned()
1604+
// This is to ensure it won't be intercepted by Seccomp if enabled.
1605+
| syscall_intercept::MMAP_FLAG_MAGIC) as usize,
1606+
fd,
1607+
{
1608+
#[cfg(target_arch = "x86_64")]
1609+
{
1610+
file_offset
1611+
}
1612+
#[cfg(target_arch = "x86")]
1613+
{
1614+
// mmap2 takes offset in pages, not bytes
1615+
file_offset / ALIGN
1616+
}
1617+
},
1618+
)
1619+
};
1620+
1621+
let _ = unsafe {
1622+
syscalls::syscall2(
1623+
syscalls::Sysno::close,
1624+
fd, // This is to ensure it won't be intercepted by Seccomp if enabled.
1625+
syscall_intercept::SYSCALL_ARG_MAGIC,
1626+
)
1627+
};
1628+
1629+
match result {
1630+
Ok(ptr) => Ok(UserMutPtr::from_usize(ptr)),
1631+
Err(_) => Err(CowAllocationError::InternalFailure),
1632+
}
1633+
}
14931634
}
14941635

14951636
impl litebox::platform::StdioProvider for LinuxUserland {

0 commit comments

Comments
 (0)