Skip to content

Commit 44adc41

Browse files
kernel: add indirect function support
* Use it to determine the fastest implementation of `memcpy` and `memset`. Signed-off-by: Anhad Singh <[email protected]>
1 parent 23ed2ee commit 44adc41

File tree

6 files changed

+162
-34
lines changed

6 files changed

+162
-34
lines changed

src/aero_kernel/src/arch/x86_64/mem.rs

Lines changed: 65 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,16 @@
1515
// You should have received a copy of the GNU General Public License
1616
// along with Aero. If not, see <https://www.gnu.org/licenses/>.
1717

18-
#[no_mangle]
18+
fn should_store_by_byte() -> bool {
19+
let cpuid = raw_cpuid::CpuId::new();
20+
if let Some(features) = cpuid.get_extended_feature_info() {
21+
// Check if "Enhanced" or "Fast Short" optimizations are available.
22+
features.has_rep_movsb_stosb()
23+
} else {
24+
false
25+
}
26+
}
27+
1928
#[naked]
2029
unsafe extern "C" fn memcpy_movsq(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 {
2130
// Registers used:
@@ -39,7 +48,33 @@ unsafe extern "C" fn memcpy_movsq(dest: *mut u8, src: *const u8, len: usize) ->
3948
);
4049
}
4150

42-
#[no_mangle]
51+
#[naked]
52+
unsafe extern "C" fn memcpy_movsb(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 {
53+
// Registers used:
54+
//
55+
// %rdi = argument 1, `dest`
56+
// %rsi = argument 2, `src`
57+
// %rdx = argument 3, `len`
58+
asm!(
59+
// Save the return value.
60+
"mov rax, rdi",
61+
// Copy!
62+
"mov rcx, rdx",
63+
"rep movsb",
64+
"ret",
65+
options(noreturn)
66+
)
67+
}
68+
69+
#[indirect]
70+
extern "C" fn memcpy() -> fn(*mut u8, *const u8, usize) {
71+
if should_store_by_byte() {
72+
memcpy_movsb
73+
} else {
74+
memcpy_movsq
75+
}
76+
}
77+
4378
#[naked]
4479
unsafe extern "C" fn memset_stosq(dest: *mut u8, byte: i32, len: usize) -> *mut u8 {
4580
// Registers used:
@@ -70,6 +105,34 @@ unsafe extern "C" fn memset_stosq(dest: *mut u8, byte: i32, len: usize) -> *mut
70105
)
71106
}
72107

108+
#[naked]
109+
unsafe extern "C" fn memset_stosb(dest: *mut u8, byte: i32, len: usize) -> *mut u8 {
110+
// Registers used:
111+
//
112+
// %rdi = argument 1, `dest`
113+
// %rsi = argument 2, `byte`
114+
// %rdx = argument 3, `len`
115+
asm!(
116+
// Save the return value.
117+
"mov r11, rdi",
118+
"mov al, sil",
119+
"mov rcx, rdx",
120+
"rep stosb",
121+
"mov rax, r11",
122+
"ret",
123+
options(noreturn)
124+
)
125+
}
126+
127+
#[indirect]
128+
extern "C" fn memset() -> fn(*mut u8, i32, usize) {
129+
if should_store_by_byte() {
130+
memset_stosb
131+
} else {
132+
memset_stosq
133+
}
134+
}
135+
73136
#[no_mangle]
74137
#[naked]
75138
unsafe extern "C" fn memmove_erms(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 {
@@ -108,18 +171,6 @@ unsafe extern "C" fn memmove_erms(dest: *mut u8, src: *const u8, len: usize) ->
108171
)
109172
}
110173

111-
// FIXME(andypython): pick the best implementation for the current CPU using indirect functions.
112-
113-
#[no_mangle]
114-
extern "C" fn memcpy(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 {
115-
unsafe { memcpy_movsq(dest, src, len) }
116-
}
117-
118-
#[no_mangle]
119-
extern "C" fn memset(dest: *mut u8, byte: i32, len: usize) -> *mut u8 {
120-
unsafe { memset_stosq(dest, byte, len) }
121-
}
122-
123174
#[no_mangle]
124175
extern "C" fn memmove(dest: *mut u8, src: *const u8, len: usize) -> *mut u8 {
125176
unsafe { memmove_erms(dest, src, len) }

src/aero_kernel/src/arch/x86_64/mod.rs

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -59,25 +59,6 @@ static HHDM: HhdmRequest = HhdmRequest::new(0);
5959

6060
#[no_mangle]
6161
extern "C" fn arch_aero_main() -> ! {
62-
unsafe {
63-
core::ptr::read_volatile(STACK.get_response().as_ptr().unwrap());
64-
}
65-
66-
// SAFETY: We have exclusive access to the memory map.
67-
let memmap = MEMMAP
68-
.get_response()
69-
.get_mut()
70-
.expect("limine: invalid memmap response")
71-
.memmap_mut();
72-
73-
unsafe {
74-
interrupts::disable_interrupts();
75-
}
76-
77-
unsafe {
78-
crate::PHYSICAL_MEMORY_OFFSET = VirtAddr::new(HHDM.get_response().get().unwrap().offset);
79-
}
80-
8162
let kernel_file_resp = KERNEL_FILE
8263
.get_response()
8364
.get()
@@ -107,6 +88,27 @@ extern "C" fn arch_aero_main() -> ! {
10788
UnwindInfo::new(elf)
10889
});
10990

91+
crate::relocate_self();
92+
93+
unsafe {
94+
core::ptr::read_volatile(STACK.get_response().as_ptr().unwrap());
95+
}
96+
97+
// SAFETY: We have exclusive access to the memory map.
98+
let memmap = MEMMAP
99+
.get_response()
100+
.get_mut()
101+
.expect("limine: invalid memmap response")
102+
.memmap_mut();
103+
104+
unsafe {
105+
interrupts::disable_interrupts();
106+
}
107+
108+
unsafe {
109+
crate::PHYSICAL_MEMORY_OFFSET = VirtAddr::new(HHDM.get_response().get().unwrap().offset);
110+
}
111+
110112
// Now that we have unwind info, we can initialize the COM ports. This
111113
// will be used to print panic messages/logs before the debug renderer is
112114
// initialized to the serial output (if available).

src/aero_kernel/src/main.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,32 @@ static mut PHYSICAL_MEMORY_OFFSET: VirtAddr = VirtAddr::zero();
112112

113113
const IO_VIRTUAL_BASE: VirtAddr = VirtAddr::new(0xffffff0000000000);
114114

115+
const STT_GNU_IFUNC: u32 = 37;
116+
117+
pub fn relocate_self() {
118+
use xmas_elf::sections::SectionData;
119+
120+
let unwind_info = unwind::UNWIND_INFO.get().unwrap();
121+
let kernel_elf = &unwind_info.kernel_elf;
122+
123+
for section in kernel_elf.section_iter() {
124+
if let Ok(SectionData::Rela64(rela)) = section.get_data(kernel_elf) {
125+
for item in rela {
126+
if !item.get_type() == STT_GNU_IFUNC {
127+
continue;
128+
}
129+
130+
let offset = unsafe { &mut *(item.get_offset() as *mut usize) };
131+
132+
let resolver_ptr = item.get_addend() as *const u8;
133+
let resolver: fn() -> usize = unsafe { core::mem::transmute(resolver_ptr) };
134+
135+
*offset = resolver();
136+
}
137+
}
138+
}
139+
}
140+
115141
fn aero_main() -> ! {
116142
// NOTE: In this function we only want to initialize essential services, including
117143
// the task scheduler. Rest of the initializing (including kernel modules) should go

src/aero_kernel/src/unwind.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ pub fn prepare_panic() {
6060
}
6161

6262
pub struct UnwindInfo {
63-
kernel_elf: ElfFile<'static>,
63+
pub kernel_elf: ElfFile<'static>,
6464
}
6565

6666
impl UnwindInfo {

src/aero_proc/src/indirect.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
use proc_macro::TokenStream;
2+
use proc_macro2::{Ident, Span};
3+
4+
pub fn parse(_: TokenStream, item: TokenStream) -> TokenStream {
5+
let item = syn::parse_macro_input!(item as syn::ItemFn);
6+
let args = item.sig.inputs;
7+
8+
let name = item.sig.ident.to_string();
9+
10+
// Underscores at the beginning of the identifier make it reserved, and the more underscores
11+
// there are, the more reserveder it is.
12+
let resolve_name = Ident::new(&format!("__resolve_{name}"), Span::call_site());
13+
14+
let inline = format!(
15+
r"
16+
.global {name}
17+
18+
.type {name}, @gnu_indirect_function
19+
.set {name},{{}}
20+
"
21+
);
22+
23+
let name = &item.sig.ident;
24+
let resolve_body = &item.block;
25+
26+
quote::quote! {
27+
fn #resolve_name() -> usize {
28+
let resolved_function = {
29+
#resolve_body
30+
};
31+
32+
resolved_function as usize
33+
}
34+
35+
::core::arch::global_asm!(#inline, sym #resolve_name);
36+
37+
extern "C" {
38+
fn #name(#args);
39+
}
40+
}
41+
.into()
42+
}

src/aero_proc/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ extern crate proc_macro_error;
2222

2323
mod cpu_local;
2424
mod downcastable;
25+
mod indirect;
2526
mod syscall;
2627
mod test;
2728

@@ -64,3 +65,9 @@ pub fn downcastable(attr: TokenStream, item: TokenStream) -> TokenStream {
6465
pub fn cpu_local(attr: TokenStream, item: TokenStream) -> TokenStream {
6566
cpu_local::parse(attr, item)
6667
}
68+
69+
#[proc_macro_attribute]
70+
#[proc_macro_error]
71+
pub fn indirect(attr: TokenStream, item: TokenStream) -> TokenStream {
72+
indirect::parse(attr, item)
73+
}

0 commit comments

Comments
 (0)