@@ -6,47 +6,56 @@ use paste::paste;
6
6
7
7
#[ gpu_only]
8
8
pub unsafe fn membar_device ( ) {
9
- asm ! ( "membar.gl" ) ;
9
+ asm ! ( "membar.gl; " ) ;
10
10
}
11
11
12
12
#[ gpu_only]
13
13
pub unsafe fn membar_block ( ) {
14
- asm ! ( "membar.cta" ) ;
14
+ asm ! ( "membar.cta; " ) ;
15
15
}
16
16
17
17
#[ gpu_only]
18
18
pub unsafe fn membar_system ( ) {
19
- asm ! ( "membar.sys" ) ;
19
+ asm ! ( "membar.sys; " ) ;
20
20
}
21
21
22
22
#[ gpu_only]
23
23
pub unsafe fn fence_sc_device ( ) {
24
- asm ! ( "fence.sc.gl" ) ;
24
+ asm ! ( "fence.sc.gl; " ) ;
25
25
}
26
26
27
27
#[ gpu_only]
28
28
pub unsafe fn fence_sc_block ( ) {
29
- asm ! ( "fence.sc.cta" ) ;
29
+ asm ! ( "fence.sc.cta; " ) ;
30
30
}
31
31
32
32
#[ gpu_only]
33
33
pub unsafe fn fence_sc_system ( ) {
34
- asm ! ( "fence.sc.sys" ) ;
34
+ asm ! ( "fence.sc.sys; " ) ;
35
35
}
36
36
37
37
#[ gpu_only]
38
38
pub unsafe fn fence_acqrel_device ( ) {
39
- asm ! ( "fence.acq_rel.gl" ) ;
39
+ asm ! ( "fence.acq_rel.gl; " ) ;
40
40
}
41
41
42
42
#[ gpu_only]
43
43
pub unsafe fn fence_acqrel_block ( ) {
44
- asm ! ( "fence.acq_rel.sys" ) ;
44
+ asm ! ( "fence.acq_rel.sys; " ) ;
45
45
}
46
46
47
47
#[ gpu_only]
48
48
pub unsafe fn fence_acqrel_system ( ) {
49
- asm ! ( "fence.acq_rel.sys" ) ;
49
+ asm ! ( "fence.acq_rel.sys;" ) ;
50
+ }
51
+
52
+ macro_rules! load_scope {
53
+ ( volatile, $scope: ident) => {
54
+ ""
55
+ } ;
56
+ ( $ordering: ident, $scope: ident) => {
57
+ concat!( "." , stringify!( $scope) )
58
+ } ;
50
59
}
51
60
52
61
macro_rules! load {
@@ -59,7 +68,7 @@ macro_rules! load {
59
68
pub unsafe fn [ <atomic_load_ $ordering _ $width _ $scope>] ( ptr: * const [ <u $width>] ) -> [ <u $width>] {
60
69
let mut out;
61
70
asm!(
62
- concat!( "ld." , stringify!( $ordering) , "." , stringify !( $scope_asm ) , "." , stringify!( [ <u $width>] ) , "{}, [{}]" ) ,
71
+ concat!( "ld." , stringify!( $ordering) , load_scope !( $ordering , $scope ) , "." , stringify!( [ <u $width>] ) , " {}, [{}]; " ) ,
63
72
out( [ <reg $width>] ) out,
64
73
in( reg64) ptr
65
74
) ;
@@ -105,7 +114,7 @@ macro_rules! store {
105
114
#[ doc = concat!( "Performs a " , stringify!( $ordering) , " atomic store at the " , stringify!( $scope) , " level with a width of " , stringify!( $width) , " bits" ) ]
106
115
pub unsafe fn [ <atomic_store_ $ordering _ $width _ $scope>] ( ptr: * mut [ <u $width>] , val: [ <u $width>] ) {
107
116
asm!(
108
- concat!( "st." , stringify!( $ordering) , "." , stringify !( $scope_asm ) , "." , stringify!( [ <u $width>] ) , "[{}], {}" ) ,
117
+ concat!( "st." , stringify!( $ordering) , load_scope !( $ordering , $scope ) , "." , stringify!( [ <u $width>] ) , " [{}], {}; " ) ,
109
118
in( reg64) ptr,
110
119
in( [ <reg $width>] ) val,
111
120
) ;
@@ -141,6 +150,19 @@ store! {
141
150
volatile, 64 , system, sys,
142
151
}
143
152
153
+ #[ allow( unused_macros) ]
154
+ macro_rules! ptx_type {
155
+ ( i32 ) => {
156
+ "s32"
157
+ } ;
158
+ ( i64 ) => {
159
+ "s64"
160
+ } ;
161
+ ( $ty: ident) => {
162
+ stringify!( $ty)
163
+ } ;
164
+ }
165
+
144
166
#[ allow( unused_macros) ]
145
167
macro_rules! ordering {
146
168
( volatile) => {
@@ -172,7 +194,8 @@ macro_rules! atomic_fetch_op_2_reg {
172
194
"." ,
173
195
stringify!( $op) ,
174
196
"." ,
175
- "{}, [{}]"
197
+ ptx_type!( $type) ,
198
+ " {}, [{}];"
176
199
) ,
177
200
out( [ <reg $width>] ) out,
178
201
in( reg64) ptr,
@@ -359,7 +382,8 @@ macro_rules! atomic_fetch_op_3_reg {
359
382
"." ,
360
383
stringify!( $op) ,
361
384
"." ,
362
- "{}, [{}], {}"
385
+ ptx_type!( $type) ,
386
+ " {}, [{}], {};"
363
387
) ,
364
388
out( [ <reg $width>] ) out,
365
389
in( reg64) ptr,
@@ -1101,7 +1125,8 @@ macro_rules! atomic_fetch_op_4_reg {
1101
1125
"." ,
1102
1126
stringify!( $op) ,
1103
1127
"." ,
1104
- "{}, [{}], {}, {}"
1128
+ ptx_type!( $type) ,
1129
+ " {}, [{}], {}, {};"
1105
1130
) ,
1106
1131
out( [ <reg $width>] ) out,
1107
1132
in( reg64) ptr,
@@ -1227,6 +1252,19 @@ atomic_fetch_op_4_reg! {
1227
1252
volatile, cas, 64 , f64 , system, sys,
1228
1253
}
1229
1254
1255
+ #[ allow( unused_macros) ]
1256
+ macro_rules! negation {
1257
+ ( u32 , $val: ident) => { {
1258
+ -( $val as i32 )
1259
+ } } ;
1260
+ ( u64 , $val: ident) => { {
1261
+ -( $val as i64 )
1262
+ } } ;
1263
+ ( $type: ty, $val: ident) => { {
1264
+ -$val
1265
+ } } ;
1266
+ }
1267
+
1230
1268
// atomic sub is a little special, nvcc implements it as an atomic add with a negated operand. PTX
1231
1269
// does not have atom.sub.
1232
1270
macro_rules! atomic_sub {
@@ -1246,11 +1284,12 @@ macro_rules! atomic_sub {
1246
1284
"." ,
1247
1285
"add" ,
1248
1286
"." ,
1249
- "{}, [{}], {}"
1287
+ ptx_type!( $type) ,
1288
+ " {}, [{}], {};"
1250
1289
) ,
1251
1290
out( [ <reg $width>] ) out,
1252
1291
in( reg64) ptr,
1253
- in( [ <reg $width>] ) - ( val as [ <i $width> ] ) ,
1292
+ in( [ <reg $width>] ) negation! ( $type , val ) ,
1254
1293
) ;
1255
1294
out
1256
1295
}
0 commit comments