33
44use litebox:: mm:: linux:: { PageFaultError , PageRange , VmFlags , VmemPageFaultHandler } ;
55use litebox:: platform:: page_mgmt;
6+ use litebox:: utils:: TruncateExt ;
67use x86_64:: {
78 PhysAddr , VirtAddr ,
89 structures:: {
@@ -30,6 +31,49 @@ const FLUSH_TLB: bool = true;
3031#[ cfg( test) ]
3132const FLUSH_TLB : bool = false ;
3233
34+ /// When flushing more than this many pages, use a full CR3 reload instead of
35+ /// per-page `invlpg` instructions. This heuristic is from the Linux kernel.
36+ /// https://elixir.bootlin.com/linux/v6.18.6/source/arch/x86/mm/tlb.c#L1394
37+ #[ cfg( not( test) ) ]
38+ const TLB_SINGLE_PAGE_FLUSH_CEILING : usize = 33 ;
39+
40+ /// Flush TLB entries for a contiguous page range.
41+ ///
42+ /// For small ranges (<= [`TLB_SINGLE_PAGE_FLUSH_CEILING`]) this issues `invlpg` per
43+ /// page. For larger ranges it reloads CR3 to flush the entire TLB.
44+ ///
45+ /// TODO(security): Both `invlpg` and CR3 reload are local to the executing core.
46+ /// Threads of the same process (CLONE_VM) can be scheduled on different cores
47+ /// sharing the same page table, so a remote core may retain stale TLB entries
48+ /// after unmap/mprotect. While threads sharing a page table are not themselves
49+ /// a security boundary, a stale TLB entry can allow access to a physical frame
50+ /// that has been freed and reallocated to a *different* process, breaking
51+ /// process isolation. This function should send IPIs to remote cores for a
52+ /// cross-core TLB shootdown (similar to Linux's `flush_tlb_others` /
53+ /// `native_flush_tlb_others`). On AMD CPUs with INVLPGB support, use the
54+ /// `invlpgb`/`tlbsync` instructions instead of IPIs for broadcast invalidation.
55+ /// On newer Intel CPUs, use Remote Action Requests (RAR) for the same purpose.
56+ ///
57+ /// Note: OP-TEE TAs only unmap pages when tearing down the entire TA instance
58+ /// and its sessions, so multi-session TAs sharing a page table are not affected.
59+ #[ cfg( not( test) ) ]
60+ fn flush_tlb_range ( start : Page < Size4KiB > , count : usize ) {
61+ if count == 0 {
62+ return ;
63+ }
64+ if count <= TLB_SINGLE_PAGE_FLUSH_CEILING {
65+ let end = start + count as u64 ;
66+ for page in Page :: range ( start, end) {
67+ x86_64:: instructions:: tlb:: flush ( page. start_address ( ) ) ;
68+ }
69+ } else {
70+ x86_64:: instructions:: tlb:: flush_all ( ) ;
71+ }
72+ }
73+
74+ #[ cfg( test) ]
75+ fn flush_tlb_range ( _start : Page < Size4KiB > , _count : usize ) { }
76+
3377#[ inline]
3478fn frame_to_pointer < M : MemoryProvider > ( frame : PhysFrame ) -> * mut PageTable {
3579 let virt = M :: pa_to_va ( frame. start_address ( ) ) ;
@@ -102,39 +146,42 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
102146 UserMutPtr :: from_ptr ( range. start as * mut u8 )
103147 }
104148
105- /// Unmap 4KiB pages from the page table
106- /// Set `dealloc_frames` to `true` to free the corresponding physical frames.
149+ /// Unmap a range of 4KiB pages from the page table.
150+ ///
151+ /// Set `dealloc_frames` to `true` to free the corresponding physical frames. Skip this
152+ /// when the corresponding physical frames are managed elsewhere (e.g., VTL0).
107153 /// Set `flush_tlb` to `true` to flush TLB entries after unmapping (not needed when
108154 /// the page table is being destroyed).
109- ///
110- /// Note it does not free the allocated frames for page table itself (only those allocated to
111- /// user space) .
155+ /// Set `clean_up_page_tables` to `true` to free intermediate page-table frames
156+ /// (P1/P2/P3) that become empty after unmapping. Skip this when the VA range
157+ /// will be reused soon, as the intermediate frames would just be re-allocated .
112158 pub ( crate ) unsafe fn unmap_pages (
113159 & self ,
114160 range : PageRange < ALIGN > ,
115161 dealloc_frames : bool ,
116162 flush_tlb : bool ,
163+ clean_up_page_tables : bool ,
117164 ) -> Result < ( ) , page_mgmt:: DeallocationError > {
118- let start_va = VirtAddr :: new ( range. start as _ ) ;
119- let start = Page :: < Size4KiB > :: from_start_address ( start_va)
165+ if range. is_empty ( ) {
166+ return Ok ( ( ) ) ;
167+ }
168+ let start = Page :: < Size4KiB > :: from_start_address ( VirtAddr :: new ( range. start as _ ) )
120169 . or ( Err ( page_mgmt:: DeallocationError :: Unaligned ) ) ?;
121- let end_va = VirtAddr :: new ( range. end as _ ) ;
122- let end = Page :: < Size4KiB > :: from_start_address ( end_va)
170+ let end = Page :: < Size4KiB > :: from_start_address ( VirtAddr :: new ( range. end as _ ) )
123171 . or ( Err ( page_mgmt:: DeallocationError :: Unaligned ) ) ?;
124172 let mut allocator = PageTableAllocator :: < M > :: new ( ) ;
125173
174+ // Note: TLB entries are batch-flushed after all pages are unmapped, consistent
175+ // with the Linux kernel's mmu_gather approach.
126176 // Note this implementation is slow as each page requires a full page table walk.
127177 // If we have N pages, it will be N times slower.
128178 let mut inner = self . inner . lock ( ) ;
129179 for page in Page :: range ( start, end) {
130180 match inner. unmap ( page) {
131- Ok ( ( frame, fl ) ) => {
181+ Ok ( ( frame, _ ) ) => {
132182 if dealloc_frames {
133183 unsafe { allocator. deallocate_frame ( frame) } ;
134184 }
135- if flush_tlb && FLUSH_TLB {
136- fl. flush ( ) ;
137- }
138185 }
139186 Err ( X64UnmapError :: PageNotMapped ) => { }
140187 Err ( X64UnmapError :: ParentEntryHugePage ) => {
@@ -145,6 +192,20 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
145192 }
146193 }
147194 }
195+
196+ if flush_tlb {
197+ let page_count = ( end. start_address ( ) - start. start_address ( ) ) / Size4KiB :: SIZE ;
198+ flush_tlb_range ( start, page_count. truncate ( ) ) ;
199+ }
200+
201+ if clean_up_page_tables {
202+ // Safety: all leaf entries in the range have been unmapped above;
203+ // the caller guarantees this VA range is no longer in use.
204+ unsafe {
205+ inner. clean_up_addr_range ( Page :: range_inclusive ( start, end - 1u64 ) , & mut allocator) ;
206+ }
207+ }
208+
148209 Ok ( ( ) )
149210 }
150211
@@ -180,7 +241,7 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
180241 end : user_addr_max,
181242 } ;
182243 // Safety: The caller ensures no references to the unmapped pages exist.
183- let _ = unsafe { self . unmap_pages ( user_range, true , false ) } ;
244+ let _ = unsafe { self . unmap_pages ( user_range, true , false , false ) } ;
184245
185246 // Clean up all empty P1 - P3 tables
186247 let mut allocator = PageTableAllocator :: < M > :: new ( ) ;
@@ -204,18 +265,23 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
204265 let end: Page < Size4KiB > = Page :: from_start_address ( VirtAddr :: new ( old_range. end as u64 ) )
205266 . or ( Err ( page_mgmt:: RemapError :: Unaligned ) ) ?;
206267
268+ // Note: TLB entries for the old addresses are batch-flushed after all pages
269+ // are remapped, consistent with the Linux kernel's approach. Only the old
270+ // (unmapped) addresses need flushing; the new addresses are not-present →
271+ // present transitions and do not require flushing.
207272 // Note this implementation is slow as each page requires three full page table walks.
208273 // If we have N pages, it will be 3N times slower.
209274 let mut allocator = PageTableAllocator :: < M > :: new ( ) ;
210275 let mut inner = self . inner . lock ( ) ;
276+ let flush_start = start;
211277 while start < end {
212278 match inner. translate ( start. start_address ( ) ) {
213279 TranslateResult :: Mapped {
214280 frame : _,
215281 offset : _,
216282 flags,
217283 } => match inner. unmap ( start) {
218- Ok ( ( frame, fl ) ) => {
284+ Ok ( ( frame, _ ) ) => {
219285 match unsafe { inner. map_to ( new_start, frame, flags, & mut allocator) } {
220286 Ok ( _) => { }
221287 Err ( e) => match e {
@@ -230,9 +296,6 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
230296 }
231297 } ,
232298 }
233- if FLUSH_TLB {
234- fl. flush ( ) ;
235- }
236299 }
237300 Err ( X64UnmapError :: PageNotMapped ) => {
238301 unreachable ! ( )
@@ -255,6 +318,16 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
255318 new_start += 1 ;
256319 }
257320
321+ // Flush old (unmapped) addresses. The new (mapped) addresses are not-present →
322+ // present transitions which also require flushing under Hyper-V's virtual TLB
323+ // (see comment in `map_phys_frame_range`).
324+ let page_count = ( end. start_address ( ) - flush_start. start_address ( ) ) / Size4KiB :: SIZE ;
325+ flush_tlb_range ( flush_start, page_count. truncate ( ) ) ;
326+ let new_flush_start =
327+ Page :: < Size4KiB > :: from_start_address ( VirtAddr :: new ( new_range. start as u64 ) )
328+ . or ( Err ( page_mgmt:: RemapError :: Unaligned ) ) ?;
329+ flush_tlb_range ( new_flush_start, page_count. truncate ( ) ) ;
330+
258331 Ok ( UserMutPtr :: from_ptr ( new_range. start as * mut u8 ) )
259332 }
260333
@@ -270,6 +343,8 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
270343 Page :: from_start_address ( start) . or ( Err ( page_mgmt:: PermissionUpdateError :: Unaligned ) ) ?;
271344 let end: Page < Size4KiB > = Page :: containing_address ( end - 1 ) ;
272345
346+ // Note: TLB entries are batch-flushed after all permission updates, consistent
347+ // with the Linux kernel's flush_tlb_range approach.
273348 // TODO: this implementation is slow as each page requires two full page table walks.
274349 // If we have N pages, it will be 2N times slower.
275350 let mut inner = self . inner . lock ( ) ;
@@ -292,11 +367,7 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
292367 match unsafe {
293368 inner. update_flags ( page, ( flags & !Self :: MPROTECT_PTE_MASK ) | new_flags)
294369 } {
295- Ok ( fl) => {
296- if FLUSH_TLB {
297- fl. flush ( ) ;
298- }
299- }
370+ Ok ( _) => { }
300371 Err ( e) => match e {
301372 FlagUpdateError :: PageNotMapped => unreachable ! ( ) ,
302373 FlagUpdateError :: ParentEntryHugePage => {
@@ -313,6 +384,9 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
313384 }
314385 }
315386
387+ let page_count = ( end. start_address ( ) - start. start_address ( ) ) / Size4KiB :: SIZE + 1 ;
388+ flush_tlb_range ( start, page_count. truncate ( ) ) ;
389+
316390 Ok ( ( ) )
317391 }
318392
@@ -353,15 +427,26 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
353427 match unsafe {
354428 inner. map_to_with_table_flags ( page, target_frame, flags, flags, & mut allocator)
355429 } {
356- Ok ( fl) => {
357- if FLUSH_TLB {
358- fl. flush ( ) ;
359- }
360- }
430+ Ok ( _) => { }
361431 Err ( e) => return Err ( e) ,
362432 }
363433 }
364434
435+ // Note: On bare-metal x86, TLB invalidation is not architecturally required
436+ // when changing a PTE from not-present to present (Intel SDM §4.10.4.3).
437+ // However, Hyper-V's virtual TLB may cache not-present translations, so
438+ // the guest must invalidate when transitioning from not-present to present.
439+ // See TLFS §11.6.1 "Recommendations for Address Spaces":
440+ // "it is recommended that operating systems running as a guest of a
441+ // hypervisor invalidate [...] TLB entries that correspond to a transition
442+ // from a not-present to a present state"
443+ // Without this flush, the hypervisor may serve stale "not-present"
444+ // translations, causing spurious page faults.
445+ let start =
446+ Page :: < Size4KiB > :: containing_address ( M :: pa_to_va ( frame_range. start . start_address ( ) ) ) ;
447+ let page_count = frame_range. len ( ) ;
448+ flush_tlb_range ( start, page_count. truncate ( ) ) ;
449+
365450 Ok ( M :: pa_to_va ( frame_range. start . start_address ( ) ) . as_mut_ptr ( ) )
366451 }
367452
@@ -399,29 +484,6 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
399484 . map_err ( |_| MapToError :: FrameAllocationFailed ) ?;
400485 let end_page = start_page + frames. len ( ) as u64 ;
401486
402- // Pre-scan: verify all target VAs are unmapped before modifying any page table entries.
403- // The caller provides freshly allocated VAs from the vmap allocator so collisions
404- // indicate a bug: panic in debug builds, return an error in release builds.
405- for page in Page :: range ( start_page, end_page) {
406- let va = page. start_address ( ) ;
407- match inner. translate ( va) {
408- TranslateResult :: Mapped { frame, .. } => {
409- let existing_frame =
410- PhysFrame :: < Size4KiB > :: containing_address ( frame. start_address ( ) ) ;
411- debug_assert ! (
412- false ,
413- "vmap: VA {va:?} is already mapped to {existing_frame:?}"
414- ) ;
415- return Err ( MapToError :: PageAlreadyMapped ( existing_frame) ) ;
416- }
417- TranslateResult :: NotMapped => { }
418- TranslateResult :: InvalidFrameAddress ( _) => {
419- debug_assert ! ( false , "vmap: invalid frame address at VA {va:?}" ) ;
420- return Err ( MapToError :: FrameAllocationFailed ) ;
421- }
422- }
423- }
424-
425487 let table_flags = PageTableFlags :: PRESENT | PageTableFlags :: WRITABLE ;
426488 for ( page, & target_frame) in Page :: range ( start_page, end_page) . zip ( frames. iter ( ) ) {
427489 // Note: Since we lock the entire page table for the duration of this function (`self.inner.lock()`),
@@ -437,13 +499,14 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
437499 & mut allocator,
438500 )
439501 } {
440- Ok ( fl ) => {
502+ Ok ( _ ) => {
441503 mapped_count += 1 ;
442- if FLUSH_TLB {
443- fl. flush ( ) ;
444- }
445504 }
446505 Err ( e) => {
506+ debug_assert ! (
507+ false ,
508+ "vmap: map_to_with_table_flags failed at page {page:?}: {e:?}"
509+ ) ;
447510 if mapped_count > 0 {
448511 Self :: rollback_mapped_pages (
449512 & mut inner,
@@ -459,6 +522,10 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
459522 }
460523 }
461524
525+ // See comment in `map_phys_frame_range` — Hyper-V requires TLB notification
526+ // even for not-present → present transitions.
527+ flush_tlb_range ( start_page, mapped_count) ;
528+
462529 Ok ( base_va. as_mut_ptr ( ) )
463530 }
464531
@@ -472,18 +539,19 @@ impl<M: MemoryProvider, const ALIGN: usize> X64PageTable<'_, M, ALIGN> {
472539 pages : x86_64:: structures:: paging:: page:: PageRangeInclusive < Size4KiB > ,
473540 allocator : & mut PageTableAllocator < M > ,
474541 ) {
542+ let first_page = pages. start ;
543+ let mut count: usize = 0 ;
475544 for page in pages {
476- if let Ok ( ( _frame, fl) ) = inner. unmap ( page)
477- && FLUSH_TLB
478- {
479- fl. flush ( ) ;
545+ if inner. unmap ( page) . is_ok ( ) {
546+ count += 1 ;
480547 }
481548 }
482549
483- // Free any intermediate page-table frames (P1/P2/P3) that are now
484- // empty after unmapping.
485- // Safety: the vmap VA range is used exclusively by this page table
486- // and all leaf entries have just been unmapped above.
550+ flush_tlb_range ( first_page, count) ;
551+
552+ // Safety: all leaf entries in `pages` have been unmapped above while
553+ // holding `self.inner`, so any P1/P2/P3 frames that became empty can
554+ // be safely freed.
487555 unsafe {
488556 inner. clean_up_addr_range ( pages, allocator) ;
489557 }
0 commit comments