Skip to content

Commit 8c0964c

Browse files
x86_64::task: FPU, SSE
Signed-off-by: Andy-Python-Programmer <[email protected]>
1 parent d924a19 commit 8c0964c

File tree

4 files changed

+221
-7
lines changed

4 files changed

+221
-7
lines changed

src/aero_kernel/src/arch/x86_64/controlregs.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,73 @@ bitflags::bitflags! {
185185
}
186186
}
187187

188+
bitflags::bitflags! {
189+
pub struct MxCsr: u32 {
190+
const INVALID_OPERATION = 1 << 0;
191+
const DENORMAL = 1 << 1;
192+
const DIVIDE_BY_ZERO = 1 << 2;
193+
const OVERFLOW = 1 << 3;
194+
const UNDERFLOW = 1 << 4;
195+
const PRECISION = 1 << 5;
196+
const DENORMALS_ARE_ZEROS = 1 << 6;
197+
const INVALID_OPERATION_MASK = 1 << 7;
198+
const DENORMAL_MASK = 1 << 8;
199+
const DIVIDE_BY_ZERO_MASK = 1 << 9;
200+
const OVERFLOW_MASK = 1 << 10;
201+
const UNDERFLOW_MASK = 1 << 11;
202+
const PRECISION_MASK = 1 << 12;
203+
const ROUNDING_CONTROL_NEGATIVE = 1 << 13;
204+
const ROUNDING_CONTROL_POSITIVE = 1 << 14;
205+
const ROUNDING_CONTROL_ZERO = 3 << 13;
206+
const FLUSH_TO_ZERO = 1 << 15;
207+
}
208+
}
209+
210+
bitflags::bitflags! {
211+
/// Configuration flags of the XCr0 register.
212+
///
213+
/// For MPX, [`BNDREG`](XCr0Flags::BNDREG) and [`BNDCSR`](XCr0Flags::BNDCSR) must be set/unset simultaneously.
214+
/// For AVX-512, [`OPMASK`](XCr0Flags::OPMASK), [`ZMM_HI256`](XCr0Flags::ZMM_HI256), and [`HI16_ZMM`](XCr0Flags::HI16_ZMM)
215+
/// must be set/unset simultaneously.
216+
#[repr(transparent)]
217+
pub struct XCr0Flags: u64 {
218+
/// Enables using the x87 FPU state
219+
/// with `XSAVE`/`XRSTOR`.
220+
///
221+
/// Must be set.
222+
const X87 = 1;
223+
/// Enables using MXCSR and the XMM registers
224+
/// with `XSAVE`/`XRSTOR`.
225+
///
226+
/// Must be set if [`AVX`](XCr0Flags::AVX) is set.
227+
const SSE = 1 << 1;
228+
/// Enables AVX instructions and using the upper halves of the AVX registers
229+
/// with `XSAVE`/`XRSTOR`.
230+
const AVX = 1 << 2;
231+
/// Enables MPX instructions and using the BND0-BND3 bound registers
232+
/// with `XSAVE`/`XRSTOR` (Intel Only).
233+
const BNDREG = 1 << 3;
234+
/// Enables MPX instructions and using the BNDCFGU and BNDSTATUS registers
235+
/// with `XSAVE`/`XRSTOR` (Intel Only).
236+
const BNDCSR = 1 << 4;
237+
/// Enables AVX-512 instructions and using the K0-K7 mask registers
238+
/// with `XSAVE`/`XRSTOR` (Intel Only).
239+
const OPMASK = 1 << 5;
240+
/// Enables AVX-512 instructions and using the upper halves of the lower ZMM registers
241+
/// with `XSAVE`/`XRSTOR` (Intel Only).
242+
const ZMM_HI256 = 1 << 6;
243+
/// Enables AVX-512 instructions and using the upper ZMM registers
244+
/// with `XSAVE`/`XRSTOR` (Intel Only).
245+
const HI16_ZMM = 1 << 7;
246+
/// Enables using the PKRU register
247+
/// with `XSAVE`/`XRSTOR`.
248+
const MPK = 1 << 9;
249+
/// Enables Lightweight Profiling extensions and managing LWP state
250+
/// with `XSAVE`/`XRSTOR` (AMD Only).
251+
const LWP = 1 << 62;
252+
}
253+
}
254+
188255
/// Returns the current value of the RFLAGS register.
189256
pub fn read_rflags() -> RFlags {
190257
let value: u64;
@@ -230,6 +297,35 @@ pub fn read_cr0() -> Cr0Flags {
230297
Cr0Flags::from_bits_truncate(value) // Get the flags from the bits.
231298
}
232299

300+
pub fn read_xcr0() -> XCr0Flags {
301+
let (low, high): (u32, u32);
302+
303+
unsafe {
304+
asm!(
305+
"xgetbv",
306+
in("ecx") 0,
307+
out("rax") low, out("rdx") high,
308+
options(nomem, nostack, preserves_flags),
309+
);
310+
}
311+
312+
XCr0Flags::from_bits_truncate((high as u64) << 32 | (low as u64))
313+
}
314+
315+
pub unsafe fn write_xcr0(value: XCr0Flags) {
316+
let low = value.bits() as u32;
317+
let high = (value.bits() >> 32) as u32;
318+
319+
unsafe {
320+
asm!(
321+
"xsetbv",
322+
in("ecx") 0,
323+
in("rax") low, in("rdx") high,
324+
options(nomem, nostack, preserves_flags),
325+
);
326+
}
327+
}
328+
233329
/// Write the given set of CR4 flags.
234330
///
235331
/// ## Safety
@@ -273,3 +369,17 @@ pub fn read_cr2() -> VirtAddr {
273369
VirtAddr::new(value)
274370
}
275371
}
372+
373+
pub fn read_mxcsr() -> MxCsr {
374+
let mut mxcsr: u32 = 0;
375+
unsafe {
376+
asm!("stmxcsr [{}]", in(reg) &mut mxcsr, options(nostack, preserves_flags));
377+
}
378+
MxCsr::from_bits_truncate(mxcsr)
379+
}
380+
381+
pub fn write_mxcsr(value: MxCsr) {
382+
unsafe {
383+
asm!("ldmxcsr [{}]", in(reg) &value, options(nostack, readonly));
384+
}
385+
}

src/aero_kernel/src/arch/x86_64/interrupts/exceptions.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,25 @@ interrupt_exception!(fn protection() => "Protection Fault");
5858
interrupt_exception!(fn fpu_fault() => "FPU floating point fault");
5959
interrupt_exception!(fn alignment_check() => "Alignment check fault");
6060
interrupt_exception!(fn machine_check() => "Machine check fault");
61-
interrupt_exception!(fn simd() => "SIMD floating point fault");
6261
interrupt_exception!(fn virtualization() => "Virtualization fault");
6362
interrupt_exception!(fn security() => "Security exception");
6463

64+
pub fn simd(stack: &mut InterruptErrorStack) {
65+
unwind::prepare_panic();
66+
67+
log::error!("EXCEPTION: SIMD floating point fault");
68+
log::error!("Stack: {:#x?}", stack);
69+
log::error!("MXCSR: {:?}", controlregs::read_mxcsr());
70+
71+
unwind::unwind_stack_trace();
72+
73+
unsafe {
74+
loop {
75+
super::halt();
76+
}
77+
}
78+
}
79+
6580
pub fn invalid_opcode(stack: &mut InterruptErrorStack) {
6681
// Catch SYSENTER on AMD CPUs.
6782
//

src/aero_kernel/src/arch/x86_64/mod.rs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,17 +235,27 @@ pub fn enable_acpi() {
235235
aml::get_subsystem().enable_acpi(INTERRUPT_CONTROLLER.method() as _);
236236
}
237237

238+
fn enable_xsave() {
239+
use controlregs::XCr0Flags;
240+
241+
// Enable XSAVE and x{get,set}bv
242+
let mut cr4 = controlregs::read_cr4();
243+
cr4.insert(controlregs::Cr4Flags::OSXSAVE);
244+
unsafe { controlregs::write_cr4(cr4) }
245+
246+
let mut xcr0 = controlregs::read_xcr0();
247+
xcr0.insert(XCr0Flags::X87 | XCr0Flags::SSE);
248+
unsafe { controlregs::write_xcr0(xcr0) }
249+
}
250+
238251
pub fn init_cpu() {
239252
unsafe {
240253
// Enable the no-execute page protection feature.
241254
io::wrmsr(io::IA32_EFER, io::rdmsr(io::IA32_EFER) | 1 << 11);
242255

243-
// Check if SSE is supported. SSE support is a requirement for running Aero.
244-
let has_sse = CpuId::new()
245-
.get_feature_info()
246-
.map_or(false, |i| i.has_sse());
256+
let features = CpuId::new().get_feature_info().unwrap();
247257

248-
assert!(has_sse);
258+
assert!(features.has_sse());
249259

250260
{
251261
let mut cr0 = controlregs::read_cr0();
@@ -264,5 +274,8 @@ pub fn init_cpu() {
264274

265275
controlregs::write_cr4(cr4);
266276
}
277+
278+
assert!(features.has_xsave(), "init: xsave not supported!");
279+
enable_xsave();
267280
}
268281
}

src/aero_kernel/src/arch/x86_64/task.rs

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,20 @@
3535
use alloc::alloc::alloc_zeroed;
3636

3737
use aero_syscall::{MMapFlags, MMapProt};
38+
use alloc::boxed::Box;
3839
use alloc::vec::Vec;
40+
use raw_cpuid::CpuId;
3941

4042
use core::alloc::Layout;
4143
use core::ptr::Unique;
4244

43-
use crate::arch::interrupts::InterruptErrorStack;
45+
use crate::arch::controlregs::MxCsr;
4446
use crate::fs::cache::DirCacheItem;
4547
use crate::mem::paging::*;
4648
use crate::syscall::ExecArgs;
4749
use crate::userland::vm::Vm;
4850
use crate::utils::StackHelper;
51+
use crate::{arch::interrupts::InterruptErrorStack, mem::alloc_boxed_buffer};
4952

5053
use super::{controlregs, io};
5154

@@ -121,6 +124,8 @@ pub struct ArchTask {
121124

122125
fs_base: VirtAddr,
123126
gs_base: VirtAddr,
127+
128+
fpu_storage: Option<Box<[u8]>>,
124129
}
125130

126131
impl ArchTask {
@@ -136,6 +141,8 @@ impl ArchTask {
136141

137142
fs_base: VirtAddr::zero(),
138143
gs_base: VirtAddr::zero(),
144+
145+
fpu_storage: None,
139146
}
140147
}
141148

@@ -178,6 +185,8 @@ impl ArchTask {
178185

179186
fs_base: VirtAddr::zero(),
180187
gs_base: VirtAddr::zero(),
188+
189+
fpu_storage: None,
181190
}
182191
}
183192

@@ -221,6 +230,9 @@ impl ArchTask {
221230
context.rip = fork_init as _;
222231
context.cr3 = address_space.cr3().start_address().as_u64();
223232

233+
let mut fpu_storage = alloc_boxed_buffer::<u8>(xsave_size() as usize);
234+
fpu_storage.copy_from_slice(self.fpu_storage.as_ref().unwrap());
235+
224236
Ok(Self {
225237
context: unsafe { Unique::new_unchecked(context) },
226238
context_switch_rsp: VirtAddr::new(switch_stack as u64),
@@ -230,6 +242,8 @@ impl ArchTask {
230242
// The FS and GS bases are inherited from the parent process.
231243
fs_base: self.fs_base.clone(),
232244
gs_base: self.gs_base.clone(),
245+
246+
fpu_storage: Some(fpu_storage),
233247
})
234248
}
235249

@@ -272,6 +286,9 @@ impl ArchTask {
272286
context.rip = fork_init as u64;
273287
context.cr3 = new_address_space.cr3().start_address().as_u64();
274288

289+
let mut fpu_storage = alloc_boxed_buffer::<u8>(xsave_size() as usize);
290+
fpu_storage.copy_from_slice(self.fpu_storage.as_ref().unwrap());
291+
275292
Ok(Self {
276293
context: unsafe { Unique::new_unchecked(context) },
277294
context_switch_rsp: VirtAddr::new(switch_stack as u64),
@@ -281,6 +298,8 @@ impl ArchTask {
281298
// The FS and GS bases are inherited from the parent process.
282299
fs_base: self.fs_base.clone(),
283300
gs_base: self.gs_base.clone(),
301+
302+
fpu_storage: Some(fpu_storage),
284303
})
285304
}
286305

@@ -324,6 +343,33 @@ impl ArchTask {
324343
self.fs_base = VirtAddr::zero();
325344
self.gs_base = VirtAddr::zero();
326345

346+
let mut fpu_storage = alloc_boxed_buffer::<u8>(xsave_size() as usize);
347+
348+
unsafe {
349+
xrstor(&fpu_storage);
350+
351+
// The x87 FPU control word is set to 0x37f (default), which masks all
352+
// floating-point exceptions, sets rounding to nearest, and sets the x87
353+
// FPU precision to 64 bits (as documented in Intel SDM volume 1 section
354+
// 8.1.5).
355+
const DEFAULT_FPU_CWORD: u16 = 0x37f;
356+
asm!("fldcw [{}]", in(reg) &DEFAULT_FPU_CWORD, options(nomem));
357+
358+
// Set the default MXCSR value at reset as documented in Intel SDM volume 2A.
359+
controlregs::write_mxcsr(
360+
MxCsr::INVALID_OPERATION_MASK
361+
| MxCsr::DENORMAL_MASK
362+
| MxCsr::DIVIDE_BY_ZERO_MASK
363+
| MxCsr::OVERFLOW_MASK
364+
| MxCsr::UNDERFLOW_MASK
365+
| MxCsr::PRECISION_MASK,
366+
);
367+
368+
xsave(&mut fpu_storage);
369+
}
370+
371+
self.fpu_storage = Some(fpu_storage);
372+
327373
extern "C" {
328374
fn jump_userland_exec(stack: VirtAddr, rip: VirtAddr, rflags: u64);
329375
}
@@ -473,6 +519,28 @@ impl ArchTask {
473519
}
474520
}
475521

522+
fn xsave_size() -> u32 {
523+
static XSAVE_SIZE: Option<u32> = None;
524+
XSAVE_SIZE.unwrap_or_else(|| {
525+
CpuId::new()
526+
.get_extended_state_info()
527+
.expect("xsave: cpuid extended state info unavailable")
528+
.xsave_size()
529+
})
530+
}
531+
532+
fn xsave(fpu: &mut Box<[u8]>) {
533+
unsafe {
534+
asm!("xsave [{}]", in(reg) fpu.as_ptr(), in("eax") 0xffffffffu32, in("edx") 0xffffffffu32)
535+
}
536+
}
537+
538+
fn xrstor(fpu: &Box<[u8]>) {
539+
unsafe {
540+
asm!("xrstor [{}]", in(reg) fpu.as_ptr(), in("eax") 0xffffffffu32, in("edx") 0xffffffffu32);
541+
}
542+
}
543+
476544
/// Check out the module level documentation for more information.
477545
pub fn arch_task_spinup(from: &mut ArchTask, to: &ArchTask) {
478546
extern "C" {
@@ -491,6 +559,14 @@ pub fn arch_task_spinup(from: &mut ArchTask, to: &ArchTask) {
491559
// update the swap GS target to point to the new GS base.
492560
io::wrmsr(io::IA32_KERNEL_GSBASE, to.gs_base.as_u64());
493561

562+
if let Some(fpu) = from.fpu_storage.as_mut() {
563+
xsave(fpu);
564+
}
565+
566+
if let Some(fpu) = to.fpu_storage.as_ref() {
567+
xrstor(fpu);
568+
}
569+
494570
task_spinup(&mut from.context, to.context.as_ref());
495571
}
496572
}

0 commit comments

Comments
 (0)