6
6
//! which is supported on the current CPU.
7
7
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
8
8
//!
9
- //! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
10
- //! Use the `compiler-rt` intrinsics if you want LSE support.
11
- //!
12
9
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
13
10
//!
14
11
//! Generate functions for each of the following symbols:
24
21
//! We do something similar, but with macro arguments.
25
22
#![ cfg_attr( feature = "c" , allow( unused_macros) ) ] // avoid putting the macros into a submodule
26
23
27
- // We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
24
+ use core:: sync:: atomic:: { AtomicU8 , Ordering } ;
25
+
26
+ /// non-zero if the host supports LSE atomics.
27
+ static HAVE_LSE_ATOMICS : AtomicU8 = AtomicU8 :: new ( 0 ) ;
28
+
29
+ intrinsics ! {
30
+ /// Call to enable LSE in outline atomic operations. The caller must verify
31
+ /// LSE operations are supported.
32
+ pub extern "C" fn __rust_enable_lse( ) {
33
+ HAVE_LSE_ATOMICS . store( 1 , Ordering :: Relaxed ) ;
34
+ }
35
+ }
28
36
29
37
/// Translate a byte size to a Rust type.
30
38
#[ rustfmt:: skip]
@@ -45,6 +53,7 @@ macro_rules! reg {
45
53
( 2 , $num: literal) => { concat!( "w" , $num) } ;
46
54
( 4 , $num: literal) => { concat!( "w" , $num) } ;
47
55
( 8 , $num: literal) => { concat!( "x" , $num) } ;
56
+ ( 16 , $num: literal) => { concat!( "x" , $num) } ;
48
57
}
49
58
50
59
/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
@@ -126,6 +135,41 @@ macro_rules! stxp {
126
135
} ;
127
136
}
128
137
138
+ // If supported, perform the requested LSE op and return, or fallthrough.
139
+ macro_rules! try_lse_op {
140
+ ( $op: literal, $ordering: ident, $bytes: tt, $( $reg: literal, ) * [ $mem: ident ] ) => {
141
+ concat!(
142
+ ".arch_extension lse; " ,
143
+ "adrp x16, {have_lse}; " ,
144
+ "ldrb w16, [x16, :lo12:{have_lse}]; " ,
145
+ "cbz w16, 8f; " ,
146
+ // LSE_OP s(reg),* [$mem]
147
+ concat!( lse!( $op, $ordering, $bytes) , $( " " , reg!( $bytes, $reg) , ", " , ) * "[" , stringify!( $mem) , "]; " , ) ,
148
+ "ret; " ,
149
+ "8:"
150
+ )
151
+ } ;
152
+ }
153
+
154
+ // Translate memory ordering to the LSE suffix
155
+ #[ rustfmt:: skip]
156
+ macro_rules! lse_mem_sfx {
157
+ ( Relaxed ) => { "" } ;
158
+ ( Acquire ) => { "a" } ;
159
+ ( Release ) => { "l" } ;
160
+ ( AcqRel ) => { "al" } ;
161
+ }
162
+
163
+ // Generate the aarch64 LSE operation for memory ordering and width
164
+ macro_rules! lse {
165
+ ( $op: literal, $order: ident, 16 ) => {
166
+ concat!( $op, "p" , lse_mem_sfx!( $order) )
167
+ } ;
168
+ ( $op: literal, $order: ident, $bytes: tt) => {
169
+ concat!( $op, lse_mem_sfx!( $order) , size!( $bytes) )
170
+ } ;
171
+ }
172
+
129
173
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
130
174
macro_rules! compare_and_swap {
131
175
( $ordering: ident, $bytes: tt, $name: ident) => {
@@ -137,7 +181,9 @@ macro_rules! compare_and_swap {
137
181
) -> int_ty!( $bytes) {
138
182
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
139
183
core:: arch:: naked_asm! {
140
- // UXT s(tmp0), s(0)
184
+ // CAS s(0), s(1), [x2]; if LSE supported.
185
+ try_lse_op!( "cas" , $ordering, $bytes, 0 , 1 , [ x2] ) ,
186
+ // UXT s(tmp0), s(0)
141
187
concat!( uxt!( $bytes) , " " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
142
188
"0:" ,
143
189
// LDXR s(0), [x2]
@@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
150
196
"cbnz w17, 0b" ,
151
197
"1:" ,
152
198
"ret" ,
199
+ have_lse = sym crate :: aarch64_linux:: HAVE_LSE_ATOMICS ,
153
200
}
154
201
}
155
202
}
@@ -166,6 +213,8 @@ macro_rules! compare_and_swap_i128 {
166
213
expected: i128 , desired: i128 , ptr: * mut i128
167
214
) -> i128 {
168
215
core:: arch:: naked_asm! {
216
+ // CASP x0, x1, x2, x3, [x4]; if LSE supported.
217
+ try_lse_op!( "cas" , $ordering, 16 , 0 , 1 , 2 , 3 , [ x4] ) ,
169
218
"mov x16, x0" ,
170
219
"mov x17, x1" ,
171
220
"0:" ,
@@ -179,6 +228,7 @@ macro_rules! compare_and_swap_i128 {
179
228
"cbnz w15, 0b" ,
180
229
"1:" ,
181
230
"ret" ,
231
+ have_lse = sym crate :: aarch64_linux:: HAVE_LSE_ATOMICS ,
182
232
}
183
233
}
184
234
}
@@ -195,6 +245,8 @@ macro_rules! swap {
195
245
left: int_ty!( $bytes) , right_ptr: * mut int_ty!( $bytes)
196
246
) -> int_ty!( $bytes) {
197
247
core:: arch:: naked_asm! {
248
+ // SWP s(0), s(0), [x1]; if LSE supported.
249
+ try_lse_op!( "swp" , $ordering, $bytes, 0 , 0 , [ x1] ) ,
198
250
// mov s(tmp0), s(0)
199
251
concat!( "mov " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
200
252
"0:" ,
@@ -204,6 +256,7 @@ macro_rules! swap {
204
256
concat!( stxr!( $ordering, $bytes) , " w17, " , reg!( $bytes, 16 ) , ", [x1]" ) ,
205
257
"cbnz w17, 0b" ,
206
258
"ret" ,
259
+ have_lse = sym crate :: aarch64_linux:: HAVE_LSE_ATOMICS ,
207
260
}
208
261
}
209
262
}
@@ -212,14 +265,16 @@ macro_rules! swap {
212
265
213
266
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
214
267
macro_rules! fetch_op {
215
- ( $ordering: ident, $bytes: tt, $name: ident, $op: literal) => {
268
+ ( $ordering: ident, $bytes: tt, $name: ident, $op: literal, $lse_op : literal ) => {
216
269
intrinsics! {
217
270
#[ maybe_use_optimized_c_shim]
218
271
#[ unsafe ( naked) ]
219
272
pub unsafe extern "C" fn $name (
220
273
val: int_ty!( $bytes) , ptr: * mut int_ty!( $bytes)
221
274
) -> int_ty!( $bytes) {
222
275
core:: arch:: naked_asm! {
276
+ // LSEOP s(0), s(0), [x1]; if LSE supported.
277
+ try_lse_op!( $lse_op, $ordering, $bytes, 0 , 0 , [ x1] ) ,
223
278
// mov s(tmp0), s(0)
224
279
concat!( "mov " , reg!( $bytes, 16 ) , ", " , reg!( $bytes, 0 ) ) ,
225
280
"0:" ,
@@ -231,6 +286,7 @@ macro_rules! fetch_op {
231
286
concat!( stxr!( $ordering, $bytes) , " w15, " , reg!( $bytes, 17 ) , ", [x1]" ) ,
232
287
"cbnz w15, 0b" ,
233
288
"ret" ,
289
+ have_lse = sym crate :: aarch64_linux:: HAVE_LSE_ATOMICS ,
234
290
}
235
291
}
236
292
}
@@ -240,25 +296,25 @@ macro_rules! fetch_op {
240
296
// We need a single macro to pass to `foreach_ldadd`.
241
297
macro_rules! add {
242
298
( $ordering: ident, $bytes: tt, $name: ident) => {
243
- fetch_op! { $ordering, $bytes, $name, "add" }
299
+ fetch_op! { $ordering, $bytes, $name, "add" , "ldadd" }
244
300
} ;
245
301
}
246
302
247
303
macro_rules! and {
248
304
( $ordering: ident, $bytes: tt, $name: ident) => {
249
- fetch_op! { $ordering, $bytes, $name, "bic" }
305
+ fetch_op! { $ordering, $bytes, $name, "bic" , "ldclr" }
250
306
} ;
251
307
}
252
308
253
309
macro_rules! xor {
254
310
( $ordering: ident, $bytes: tt, $name: ident) => {
255
- fetch_op! { $ordering, $bytes, $name, "eor" }
311
+ fetch_op! { $ordering, $bytes, $name, "eor" , "ldeor" }
256
312
} ;
257
313
}
258
314
259
315
macro_rules! or {
260
316
( $ordering: ident, $bytes: tt, $name: ident) => {
261
- fetch_op! { $ordering, $bytes, $name, "orr" }
317
+ fetch_op! { $ordering, $bytes, $name, "orr" , "ldset" }
262
318
} ;
263
319
}
264
320
0 commit comments