diff --git a/builtins-test/tests/lse.rs b/builtins-test/tests/lse.rs index 56891be8a..03fe9467a 100644 --- a/builtins-test/tests/lse.rs +++ b/builtins-test/tests/lse.rs @@ -1,30 +1,70 @@ #![feature(decl_macro)] // so we can use pub(super) #![feature(macro_metavar_expr_concat)] -#![cfg(all(target_arch = "aarch64", target_os = "linux"))] +#![cfg(target_arch = "aarch64")] + +use std::sync::Mutex; + +use compiler_builtins::aarch64_outline_atomics::{get_have_lse_atomics, set_have_lse_atomics}; +use compiler_builtins::int::{Int, MinInt}; +use compiler_builtins::{foreach_bytes, foreach_ordering}; + +#[track_caller] +fn with_maybe_lse_atomics(use_lse: bool, f: impl FnOnce()) { + // Ensure tests run in parallel don't interleave global settings + static LOCK: Mutex<()> = Mutex::new(()); + let _g = LOCK.lock().unwrap(); + let old = get_have_lse_atomics(); + // safety: as the caller of the unsafe fn `set_have_lse_atomics`, we + // have to ensure the CPU supports LSE. This is why we make this assertion. + if use_lse || old { + assert!(std::arch::is_aarch64_feature_detected!("lse")); + } + unsafe { set_have_lse_atomics(use_lse) }; + f(); + unsafe { set_have_lse_atomics(old) }; +} + +pub fn run_fuzz_tests_with_lse_variants(n: u32, f: F) +where + ::Unsigned: Int, +{ + // We use `fuzz_2` because our subject function `f` requires two inputs + let test_fn = || { + builtins_test::fuzz_2(n, f); + }; + // Always run without LSE + with_maybe_lse_atomics(false, test_fn); + + // Conditionally run with LSE + if std::arch::is_aarch64_feature_detected!("lse") { + with_maybe_lse_atomics(true, test_fn); + } +} /// Translate a byte size to a Rust type. macro int_ty { - (1) => { i8 }, - (2) => { i16 }, - (4) => { i32 }, - (8) => { i64 }, - (16) => { i128 } + (1) => { u8 }, + (2) => { u16 }, + (4) => { u32 }, + (8) => { u64 }, + (16) => { u128 } } mod cas { pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { #[test] fn $name() { - builtins_test::fuzz_2(10000, |expected: super::int_ty!($bytes), new| { + crate::run_fuzz_tests_with_lse_variants(10000, |expected: super::int_ty!($bytes), new| { let mut target = expected.wrapping_add(10); + let ret: super::int_ty!($bytes) = unsafe { + compiler_builtins::aarch64_outline_atomics::$name::$name( + expected, + new, + &mut target, + ) + }; assert_eq!( - unsafe { - compiler_builtins::aarch64_outline_atomics::$name::$name( - expected, - new, - &mut target, - ) - }, + ret, expected.wrapping_add(10), "return value should always be the previous value", ); @@ -35,15 +75,17 @@ mod cas { ); target = expected; + let ret: super::int_ty!($bytes) = unsafe { + compiler_builtins::aarch64_outline_atomics::$name::$name( + expected, + new, + &mut target, + ) + }; assert_eq!( - unsafe { - compiler_builtins::aarch64_outline_atomics::$name::$name( - expected, - new, - &mut target, - ) - }, - expected + ret, + expected, + "the new return value should always be the previous value (i.e. the first parameter passed to the function)", ); assert_eq!(target, new, "should have updated target"); }); @@ -59,16 +101,21 @@ mod swap { pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { #[test] fn $name() { - builtins_test::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| { - let orig_right = right; - assert_eq!( - unsafe { - compiler_builtins::aarch64_outline_atomics::$name::$name(left, &mut right) - }, - orig_right - ); - assert_eq!(left, right); - }); + crate::run_fuzz_tests_with_lse_variants( + 10000, + |left: super::int_ty!($bytes), mut right| { + let orig_right = right; + assert_eq!( + unsafe { + compiler_builtins::aarch64_outline_atomics::$name::$name( + left, &mut right, + ) + }, + orig_right + ); + assert_eq!(left, right); + }, + ); } } } @@ -80,7 +127,7 @@ macro_rules! test_op { ($_ordering:ident, $bytes:tt, $name:ident) => { #[test] fn $name() { - builtins_test::fuzz_2(10000, |old, val| { + crate::run_fuzz_tests_with_lse_variants(10000, |old, val| { let mut target = old; let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*; let expected = op(old, val); @@ -98,7 +145,6 @@ test_op!(add, |left, right| left.wrapping_add(right)); test_op!(clr, |left, right| left & !right); test_op!(xor, std::ops::BitXor::bitxor); test_op!(or, std::ops::BitOr::bitor); -use compiler_builtins::{foreach_bytes, foreach_ordering}; compiler_builtins::foreach_cas!(cas::test); compiler_builtins::foreach_cas16!(test_cas16); compiler_builtins::foreach_swp!(swap::test); diff --git a/compiler-builtins/src/aarch64_outline_atomics.rs b/compiler-builtins/src/aarch64_outline_atomics.rs index df0cf7650..100b67150 100644 --- a/compiler-builtins/src/aarch64_outline_atomics.rs +++ b/compiler-builtins/src/aarch64_outline_atomics.rs @@ -34,14 +34,27 @@ intrinsics! { } } +/// Function to enable/disable LSE. To be used only for testing purposes. +#[cfg(feature = "mangled-names")] +pub unsafe fn set_have_lse_atomics(has_lse: bool) { + let lse_flag = if has_lse { 1 } else { 0 }; + HAVE_LSE_ATOMICS.store(lse_flag, Ordering::Relaxed); +} + +/// Function to obtain whether LSE is enabled or not. To be used only for testing purposes. +#[cfg(feature = "mangled-names")] +pub fn get_have_lse_atomics() -> bool { + HAVE_LSE_ATOMICS.load(Ordering::Relaxed) != 0 +} + /// Translate a byte size to a Rust type. #[rustfmt::skip] macro_rules! int_ty { - (1) => { i8 }; - (2) => { i16 }; - (4) => { i32 }; - (8) => { i64 }; - (16) => { i128 }; + (1) => { u8 }; + (2) => { u16 }; + (4) => { u32 }; + (8) => { u64 }; + (16) => { u128 }; } /// Given a byte size and a register number, return a register of the appropriate size. @@ -135,18 +148,73 @@ macro_rules! stxp { }; } +// The AArch64 assembly syntax for relocation specifiers +// when accessing symbols changes depending on the target executable format. +// In ELF (used in Linux), we have a prefix notation surrounded by colons (:specifier:sym), +// while in Mach-O object files (used in MacOS), a postfix notation is used (sym@specifier). + +/// AArch64 ELF position-independent addressing: +/// +/// adrp xN, symbol +/// add xN, xN, :lo12:symbol +/// +/// The :lo12: modifier selects the low 12 bits of the symbol address +/// and emits an ELF relocation such as R_AARCH64_ADD_ABS_LO12_NC. +/// +/// Defined by the AArch64 ELF psABI. +/// See: . +#[cfg(not(target_vendor = "apple"))] +macro_rules! sym { + ($sym:literal) => { + $sym + }; +} + +#[cfg(not(target_vendor = "apple"))] +macro_rules! sym_off { + ($sym:literal) => { + concat!(":lo12:", $sym) + }; +} + +/// Mach-O ARM64 relocation types: +/// ARM64_RELOC_PAGE21 +/// ARM64_RELOC_PAGEOFF12 +/// +/// These relocations implement the @PAGE / @PAGEOFF split used by +/// adrp + add sequences on Apple platforms. +/// +/// adrp xN, symbol@PAGE -> ARM64_RELOC_PAGE21 +/// add xN, xN, symbol@PAGEOFF -> ARM64_RELOC_PAGEOFF12 +/// +/// Relocation types defined by Apple in XNU: . +/// See: . +#[cfg(target_vendor = "apple")] +macro_rules! sym { + ($sym:literal) => { + concat!($sym, "@PAGE") + }; +} + +#[cfg(target_vendor = "apple")] +macro_rules! sym_off { + ($sym:literal) => { + concat!($sym, "@PAGEOFF") + }; +} + // If supported, perform the requested LSE op and return, or fallthrough. macro_rules! try_lse_op { ($op: literal, $ordering:ident, $bytes:tt, $($reg:literal,)* [ $mem:ident ] ) => { concat!( - ".arch_extension lse; ", - "adrp x16, {have_lse}; ", - "ldrb w16, [x16, :lo12:{have_lse}]; ", - "cbz w16, 8f; ", + ".arch_extension lse\n", + concat!("adrp x16, ", sym!("{have_lse}"), "\n"), + concat!("ldrb w16, [x16, ", sym_off!("{have_lse}"), "]\n"), + "cbz w16, 8f\n", // LSE_OP s(reg),* [$mem] - concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]; ",), - "ret; ", - "8:" + concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]\n",), + "ret + 8:" ) }; } @@ -203,15 +271,15 @@ macro_rules! compare_and_swap { }; } -// i128 uses a completely different impl, so it has its own macro. -macro_rules! compare_and_swap_i128 { +// u128 uses a completely different impl, so it has its own macro. +macro_rules! compare_and_swap_u128 { ($ordering:ident, $name:ident) => { intrinsics! { #[maybe_use_optimized_c_shim] #[unsafe(naked)] pub unsafe extern "C" fn $name ( - expected: i128, desired: i128, ptr: *mut i128 - ) -> i128 { + expected: u128, desired: u128, ptr: *mut u128 + ) -> u128 { core::arch::naked_asm! { // CASP x0, x1, x2, x3, [x4]; if LSE supported. try_lse_op!("cas", $ordering, 16, 0, 1, 2, 3, [x4]), @@ -391,7 +459,7 @@ macro_rules! foreach_ldset { } foreach_cas!(compare_and_swap); -foreach_cas16!(compare_and_swap_i128); +foreach_cas16!(compare_and_swap_u128); foreach_swp!(swap); foreach_ldadd!(add); foreach_ldclr!(and); diff --git a/compiler-builtins/src/lib.rs b/compiler-builtins/src/lib.rs index 80395a473..a027cd978 100644 --- a/compiler-builtins/src/lib.rs +++ b/compiler-builtins/src/lib.rs @@ -57,7 +57,12 @@ pub mod arm; #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] pub mod aarch64; -#[cfg(all(target_arch = "aarch64", target_feature = "outline-atomics"))] +// Note that we enable the module on "mangled-names" because that is the default feature +// in the builtins-test tests. So this is a way of enabling the module during testing. +#[cfg(all( + target_arch = "aarch64", + any(target_feature = "outline-atomics", feature = "mangled-names") +))] pub mod aarch64_outline_atomics; #[cfg(target_arch = "avr")]