Skip to content

Commit e2806aa

Browse files
authored
page allocator: support allocating pages within an address range (theseus-os#970)
* Currently this is only used for allocating pages for new executable .text sections on aarch64, which itself is a workaround to enable runtime loading of crates (see theseus-os#940). * Based on the limitations of aarch64's ISA (branch instructions), we reserve 128MiB of virtual address space for this purpose. * This 128MiB region is for executable .text sections only, and is contiguous with the base kernel image's .text section. * This is available but not used by default on x86_64 yet.
1 parent e43c4e9 commit e2806aa

File tree

5 files changed

+220
-106
lines changed

5 files changed

+220
-106
lines changed

kernel/kernel_config/src/memory.rs

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//! The basic virtual memory map that Theseus assumes.
1+
//! The basic virtual address ranges (virtual memory map) defined by Theseus.
22
//!
33
//! Current P4 (top-level page table) mappings:
44
//! * 511: kernel text sections.
@@ -53,15 +53,15 @@ pub const TEMPORARY_PAGE_VIRT_ADDR: usize = MAX_VIRTUAL_ADDRESS;
5353

5454
/// Value: 512.
5555
pub const ENTRIES_PER_PAGE_TABLE: usize = PAGE_SIZE / BYTES_PER_ADDR;
56-
/// Value: 511. The 511th entry is used for kernel text sections
56+
/// Value: 511. The 511th entry is used (in part) for kernel text sections.
5757
pub const KERNEL_TEXT_P4_INDEX: usize = ENTRIES_PER_PAGE_TABLE - 1;
5858
/// Value: 510. The 510th entry is used to recursively map the current P4 root page table frame
59-
// such that it can be accessed and modified just like any other level of page table.
59+
/// such that it can be accessed and modified just like any other level of page table.
6060
pub const RECURSIVE_P4_INDEX: usize = ENTRIES_PER_PAGE_TABLE - 2;
61-
/// Value: 509. The 509th entry is used for the kernel heap
61+
/// Value: 509. The 509th entry is used for the kernel heap.
6262
pub const KERNEL_HEAP_P4_INDEX: usize = ENTRIES_PER_PAGE_TABLE - 3;
6363
/// Value: 508. The 508th entry is used to temporarily recursively map the P4 root page table frame
64-
// of an upcoming (new) page table such that it can be accessed and modified.
64+
/// of an upcoming (new) page table such that it can be accessed and modified.
6565
pub const UPCOMING_PAGE_TABLE_RECURSIVE_P4_INDEX: usize = ENTRIES_PER_PAGE_TABLE - 4;
6666

6767

@@ -89,12 +89,9 @@ pub const KERNEL_OFFSET: usize = canonicalize(MAX_VIRTUAL_ADDRESS - (TWO_GIGABYT
8989
/// Actual value on x86_64: 0o177777_777_000_000_000_0000, or 0xFFFF_FF80_0000_0000
9090
pub const KERNEL_TEXT_START: usize = canonicalize(KERNEL_TEXT_P4_INDEX << (P4_INDEX_SHIFT + PAGE_SHIFT));
9191

92-
/// The size in bytes, not in pages.
93-
///
94-
/// the KERNEL_OFFSET starts at (MAX_ADDR - 2GiB),
95-
/// and .text contains nano_core, so this is the
96-
/// first 510GiB of the 511th P4 entry.
97-
pub const KERNEL_TEXT_MAX_SIZE: usize = ADDRESSABILITY_PER_P4_ENTRY - TWO_GIGABYTES;
92+
/// The start of the virtual address range covered by the 510th P4 entry,
93+
/// i.e., [`RECURSIVE_P4_INDEX`];
94+
pub const RECURSIVE_P4_START: usize = canonicalize(RECURSIVE_P4_INDEX << (P4_INDEX_SHIFT + PAGE_SHIFT));
9895

9996
/// The higher-half heap gets the 512GB address range starting at the 509th P4 entry,
10097
/// which is the slot right below the recursive P4 entry (510).
@@ -103,12 +100,12 @@ pub const KERNEL_HEAP_START: usize = canonicalize(KERNEL_HEAP_P4_INDEX << (P4_IN
103100

104101
#[cfg(not(debug_assertions))]
105102
pub const KERNEL_HEAP_INITIAL_SIZE: usize = 64 * 1024 * 1024; // 64 MiB
106-
107103
#[cfg(debug_assertions)]
108104
pub const KERNEL_HEAP_INITIAL_SIZE: usize = 256 * 1024 * 1024; // 256 MiB, debug builds require more heap space.
109105

110-
/// the kernel heap gets the whole 509th P4 entry.
106+
/// The kernel heap is allowed to grow to fill the entirety of its P4 entry.
111107
pub const KERNEL_HEAP_MAX_SIZE: usize = ADDRESSABILITY_PER_P4_ENTRY;
112108

113-
/// The system (page allocator) must not use addresses at or above this address.
114-
pub const UPCOMING_PAGE_TABLE_RECURSIVE_MEMORY_START: usize = canonicalize(UPCOMING_PAGE_TABLE_RECURSIVE_P4_INDEX << (P4_INDEX_SHIFT + PAGE_SHIFT));
109+
/// The start of the virtual address range covered by the 508th P4 entry,
110+
/// i.e., [`UPCOMING_PAGE_TABLE_RECURSIVE_P4_INDEX`];
111+
pub const UPCOMING_PAGE_TABLE_RECURSIVE_P4_START: usize = canonicalize(UPCOMING_PAGE_TABLE_RECURSIVE_P4_INDEX << (P4_INDEX_SHIFT + PAGE_SHIFT));

kernel/memory/src/lib.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,8 @@ pub use self::paging::{
2323
};
2424

2525
pub use memory_structs::*;
26-
pub use page_allocator::{
27-
AllocatedPages, allocate_pages, allocate_pages_at,
28-
allocate_pages_by_bytes, allocate_pages_by_bytes_at,
29-
};
30-
31-
pub use frame_allocator::{
32-
AllocatedFrames, MemoryRegionType, PhysicalMemoryRegion,
33-
allocate_frames, allocate_frames_at, allocate_frames_by_bytes_at, allocate_frames_by_bytes,
34-
};
26+
pub use page_allocator::*;
27+
pub use frame_allocator::*;
3528

3629
#[cfg(target_arch = "x86_64")]
3730
use memory_x86_64::{ tlb_flush_virt_addr, tlb_flush_all, get_p4, find_section_memory_bounds, get_vga_mem_addr };

kernel/mod_mgmt/src/lib.rs

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use alloc::{
1414
};
1515
use spin::{Mutex, Once};
1616
use xmas_elf::{ElfFile, sections::{SHF_ALLOC, SHF_EXECINSTR, SHF_TLS, SHF_WRITE, SectionData, ShType}, symbol_table::{Binding, Type}};
17-
use memory::{MmiRef, MemoryManagementInfo, VirtualAddress, MappedPages, PteFlags, allocate_pages_by_bytes, allocate_frames_by_bytes_at};
17+
use memory::{MmiRef, MemoryManagementInfo, VirtualAddress, MappedPages, PteFlags, allocate_pages_by_bytes, allocate_frames_by_bytes_at, PageRange, allocate_pages_by_bytes_in_range};
1818
use bootloader_modules::BootloaderModule;
1919
use cow_arc::CowArc;
2020
use rustc_demangle::demangle;
@@ -33,6 +33,7 @@ pub mod parse_nano_core;
3333
pub mod replace_nano_core_crates;
3434
mod serde;
3535

36+
3637
/// The name of the directory that contains all of the CrateNamespace files.
3738
pub const NAMESPACES_DIRECTORY_NAME: &str = "namespaces";
3839

@@ -2882,6 +2883,35 @@ struct SectionPages {
28822883
}
28832884

28842885

2886+
/// The range of virtual addresses from which we allocate pages for executable .text sections.
2887+
///
2888+
/// This is mostly an architecture-specific design choice (hopefully a temporary one):
2889+
/// * On aarch64, even with the large code model, we are not (yet) able to generate
2890+
/// code with branch instructions (call/jump) that can address instructions more than
2891+
/// 128 MiB away from the current instruction.
2892+
/// Thus, we restrict the range of .text section locations to ensure they are within 128 MiB.
2893+
/// At some point in the future, this will be a limitation, but not for a long, long time.
2894+
/// * On x86_64, this is not necessary, so the range is `None`.
2895+
pub const KERNEL_TEXT_ADDR_RANGE: Option<PageRange> = {
2896+
#[cfg(target_arch = "x86_64")] {
2897+
None
2898+
}
2899+
#[cfg(target_arch = "aarch64")] {
2900+
use {memory::Page, kernel_config::memory::KERNEL_OFFSET};
2901+
2902+
const ONE_MIB: usize = 0x10_0000;
2903+
let start_vaddr = VirtualAddress::new_canonical(KERNEL_OFFSET + ONE_MIB);
2904+
let end_vaddr = VirtualAddress::new_canonical(start_vaddr.value() + (128 * ONE_MIB) - 1);
2905+
Some(PageRange::new(
2906+
// the start of the base kernel image's .text section.
2907+
Page::containing_address(start_vaddr),
2908+
// the start of the base kernel image's .text section, plus 128 MiB.
2909+
Page::containing_address(end_vaddr),
2910+
))
2911+
}
2912+
};
2913+
2914+
28852915
/// Allocates and maps memory sufficient to hold the sections that are found in the given `ElfFile`.
28862916
/// Only sections that are marked "allocated" (`ALLOC`) in the ELF object file will contribute to the mappings' sizes.
28872917
fn allocate_section_pages(elf_file: &ElfFile, kernel_mmi_ref: &MmiRef) -> Result<SectionPages, &'static str> {
@@ -2953,10 +2983,37 @@ fn allocate_section_pages(elf_file: &ElfFile, kernel_mmi_ref: &MmiRef) -> Result
29532983
// trace!("\n\texec_bytes: {exec_bytes} {exec_bytes:#X}\n\tro_bytes: {ro_bytes} {ro_bytes:#X}\n\trw_bytes: {rw_bytes} {rw_bytes:#X}");
29542984

29552985
// Allocate contiguous virtual memory pages for each section and map them to random frames as writable.
2956-
// We must allocate these pages separately because they will have different flags later.
2957-
let executable_pages = if exec_bytes > 0 { Some(allocate_and_map_as_writable(exec_bytes, TEXT_SECTION_FLAGS, kernel_mmi_ref)?) } else { None };
2958-
let read_only_pages = if ro_bytes > 0 { Some(allocate_and_map_as_writable(ro_bytes, RODATA_SECTION_FLAGS, kernel_mmi_ref)?) } else { None };
2959-
let read_write_pages = if rw_bytes > 0 { Some(allocate_and_map_as_writable(rw_bytes, DATA_BSS_SECTION_FLAGS, kernel_mmi_ref)?) } else { None };
2986+
// We must allocate these pages separately because they use different flags.
2987+
let alloc_sec = |size_in_bytes: usize, within_range: Option<&PageRange>, flags: PteFlags| {
2988+
let allocated_pages = if let Some(range) = within_range {
2989+
allocate_pages_by_bytes_in_range(size_in_bytes, range)
2990+
.map_err(|_| "Couldn't allocate pages in text section address range")?
2991+
} else {
2992+
allocate_pages_by_bytes(size_in_bytes)
2993+
.ok_or("Couldn't allocate pages for new section")?
2994+
};
2995+
2996+
kernel_mmi_ref.lock().page_table.map_allocated_pages(
2997+
allocated_pages,
2998+
flags.valid(true).writable(true)
2999+
)
3000+
};
3001+
3002+
let executable_pages = if exec_bytes > 0 {
3003+
Some(alloc_sec(exec_bytes, KERNEL_TEXT_ADDR_RANGE.as_ref(), TEXT_SECTION_FLAGS)?)
3004+
} else {
3005+
None
3006+
};
3007+
let read_only_pages = if ro_bytes > 0 {
3008+
Some(alloc_sec(ro_bytes, None, RODATA_SECTION_FLAGS)?)
3009+
} else {
3010+
None
3011+
};
3012+
let read_write_pages = if rw_bytes > 0 {
3013+
Some(alloc_sec(rw_bytes, None, DATA_BSS_SECTION_FLAGS)?)
3014+
} else {
3015+
None
3016+
};
29603017

29613018
let range_tuple = |mp: MappedPages, size_in_bytes: usize| {
29623019
let start = mp.start_address();
@@ -2971,26 +3028,6 @@ fn allocate_section_pages(elf_file: &ElfFile, kernel_mmi_ref: &MmiRef) -> Result
29713028
}
29723029

29733030

2974-
/// A convenience function for allocating virtual pages and mapping them to random physical frames.
2975-
///
2976-
/// The returned `MappedPages` will be at least as large as `size_in_bytes`,
2977-
/// rounded up to the nearest `Page` size,
2978-
/// and is mapped as writable along with the other specified `flags`
2979-
/// to ensure we can copy content into it.
2980-
fn allocate_and_map_as_writable(
2981-
size_in_bytes: usize,
2982-
flags: PteFlags,
2983-
kernel_mmi_ref: &MmiRef,
2984-
) -> Result<MappedPages, &'static str> {
2985-
let allocated_pages = allocate_pages_by_bytes(size_in_bytes)
2986-
.ok_or("Couldn't allocate_pages_by_bytes, out of virtual address space")?;
2987-
kernel_mmi_ref.lock().page_table.map_allocated_pages(
2988-
allocated_pages,
2989-
flags.valid(true).writable(true)
2990-
)
2991-
}
2992-
2993-
29943031
#[allow(dead_code)]
29953032
fn dump_dependent_crates(krate: &LoadedCrate, prefix: String) {
29963033
for weak_crate_ref in krate.crates_dependent_on_me() {

kernel/nano_core/linker_higher_half-aarch64.ld

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,18 @@ SECTIONS {
2727
*(.text .text.*)
2828
}
2929

30+
/*
31+
* Currently, we are unable to force aarch64 to emit branch (call/jump) instructions
32+
* that are capable of addressing a destination instruction pointer more than 128MiB away,
33+
* even when specifying the "large" code model with `-C code-model=large`.
34+
*
35+
* Thus, as a workaround, we reserve the 128MiB chunk of virtual address space that
36+
* directly follows the initial base kernel image's executable .text section,
37+
* ensuring it can only be used by the page allocator when allocating pages for
38+
* newly-loaded .text sections.
39+
*/
40+
. = ALIGN(128M);
41+
3042
.rodata ALIGN(4K) : AT(ADDR(.rodata) - KERNEL_OFFSET)
3143
{
3244
*(.rodata .rodata.*)

0 commit comments

Comments
 (0)