Skip to content

Commit ac34fe2

Browse files
committed
improve the performance of building page tables
Signed-off-by: Simon Davies <[email protected]>
1 parent f369076 commit ac34fe2

File tree

1 file changed

+44
-6
lines changed
  • src/hyperlight_host/src/mem

1 file changed

+44
-6
lines changed

src/hyperlight_host/src/mem/mgr.rs

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ use super::ptr::{GuestPtr, RawPtr};
3333
use super::ptr_offset::Offset;
3434
use super::shared_mem::{ExclusiveSharedMemory, GuestSharedMemory, HostSharedMemory, SharedMemory};
3535
use super::shared_mem_snapshot::SharedMemorySnapshot;
36-
use crate::error::HyperlightError::NoMemorySnapshot;
36+
use crate::HyperlightError::NoMemorySnapshot;
3737
use crate::sandbox::SandboxConfiguration;
3838
use crate::sandbox::uninitialized::GuestBlob;
3939
use crate::{HyperlightError, Result, log_then_return, new_error};
@@ -150,11 +150,16 @@ where
150150
let num_pages: usize = mem_size.div_ceil(AMOUNT_OF_MEMORY_PER_PT);
151151

152152
// Create num_pages PT with 512 PTEs
153+
// Pre-allocate buffer for all page table entries to minimize shared memory writes
154+
let total_ptes = num_pages * 512;
155+
let mut pte_buffer = vec![0u64; total_ptes]; // Pre-allocate u64 buffer directly
156+
let mut cached_region_idx: Option<usize> = None; // Cache for optimized region lookup
157+
let mut pte_index = 0;
158+
153159
for p in 0..num_pages {
154160
for i in 0..512 {
155-
let offset = SandboxMemoryLayout::PT_OFFSET + (p * 4096) + (i * 8);
156161
// Each PTE maps a 4KB page
157-
let flags = match Self::get_page_flags(p, i, regions) {
162+
let flags = match Self::get_page_flags(p, i, regions, &mut cached_region_idx) {
158163
Ok(region_type) => match region_type {
159164
// TODO: We parse and load the exe according to its sections and then
160165
// have the correct flags set rather than just marking the entire binary as executable
@@ -185,22 +190,52 @@ where
185190
Err(_) => 0,
186191
};
187192
let val_to_write = ((p << 21) as u64 | (i << 12) as u64) | flags;
188-
shared_mem.write_u64(offset, val_to_write)?;
193+
// Write u64 directly to buffer - more efficient than converting to bytes
194+
pte_buffer[pte_index] = val_to_write.to_le();
195+
pte_index += 1;
189196
}
190197
}
198+
199+
// Write the entire PTE buffer to shared memory in a single operation
200+
// Convert u64 buffer to bytes for writing to shared memory
201+
let pte_bytes = unsafe {
202+
std::slice::from_raw_parts(pte_buffer.as_ptr() as *const u8, pte_buffer.len() * 8)
203+
};
204+
shared_mem.copy_from_slice(pte_bytes, SandboxMemoryLayout::PT_OFFSET)?;
191205
Ok::<(), HyperlightError>(())
192206
})??;
193207

194208
Ok(rsp)
195209
}
196210

211+
/// Optimized page flags getter that maintains state for sequential access patterns
197212
fn get_page_flags(
198213
p: usize,
199214
i: usize,
200-
regions: &mut [MemoryRegion],
215+
regions: &[MemoryRegion],
216+
cached_region_idx: &mut Option<usize>,
201217
) -> Result<MemoryRegionType> {
202218
let addr = (p << 21) + (i << 12);
203219

220+
// First check if we're still in the cached region
221+
if let Some(cached_idx) = *cached_region_idx {
222+
if cached_idx < regions.len() && regions[cached_idx].guest_region.contains(&addr) {
223+
return Ok(regions[cached_idx].region_type);
224+
}
225+
}
226+
227+
// If not in cached region, try adjacent regions first (common for sequential access)
228+
if let Some(cached_idx) = *cached_region_idx {
229+
// Check next region
230+
if cached_idx + 1 < regions.len()
231+
&& regions[cached_idx + 1].guest_region.contains(&addr)
232+
{
233+
*cached_region_idx = Some(cached_idx + 1);
234+
return Ok(regions[cached_idx + 1].region_type);
235+
}
236+
}
237+
238+
// Fall back to binary search for non-sequential access
204239
let idx = regions.binary_search_by(|region| {
205240
if region.guest_region.contains(&addr) {
206241
std::cmp::Ordering::Equal
@@ -212,7 +247,10 @@ where
212247
});
213248

214249
match idx {
215-
Ok(index) => Ok(regions[index].region_type),
250+
Ok(index) => {
251+
*cached_region_idx = Some(index);
252+
Ok(regions[index].region_type)
253+
}
216254
Err(_) => Err(new_error!("Could not find region for address: {}", addr)),
217255
}
218256
}

0 commit comments

Comments
 (0)