|
20 | 20 | #include "Inputs/cuda.h" |
21 | 21 | #include <stdatomic.h> |
22 | 22 |
|
| 23 | +typedef float __attribute__((ext_vector_type(2))) vector_float; |
| 24 | + |
23 | 25 | __global__ void ffp1(float *p) { |
24 | 26 | // CHECK-LABEL: @_Z4ffp1Pf |
25 | 27 | // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}} |
@@ -225,6 +227,55 @@ __global__ void ffp6(_Float16 *p) { |
225 | 227 | __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); |
226 | 228 | } |
227 | 229 |
|
| 230 | +__global__ void ffp7(vector_float *p) { |
| 231 | + // CHECK-LABEL: @_Z4ffp7PDv2_f |
| 232 | + // SAFEIR: atomicrmw fadd ptr {{.*}}<2 x float>{{.*}} monotonic, align 8{{$}} |
| 233 | + // SAFEIR: atomicrmw fsub ptr {{.*}}<2 x float>{{.*}} monotonic, align 8{{$}} |
| 234 | + // SAFEIR: atomicrmw fmax ptr {{.*}}<2 x float>{{.*}} monotonic, align 8{{$}} |
| 235 | + // SAFEIR: atomicrmw fmin ptr {{.*}}<2 x float>{{.*}} monotonic, align 8{{$}} |
| 236 | + // SAFEIR: atomicrmw fadd ptr {{.*}}<2 x float>{{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} |
| 237 | + // SAFEIR: atomicrmw fsub ptr {{.*}}<2 x float>{{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} |
| 238 | + // SAFEIR: atomicrmw fmax ptr {{.*}}<2 x float>{{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} |
| 239 | + // SAFEIR: atomicrmw fmin ptr {{.*}}<2 x float>{{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}} |
| 240 | + |
| 241 | + // UNSAFEIR: atomicrmw fadd ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} |
| 242 | + // UNSAFEIR: atomicrmw fsub ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} |
| 243 | + // UNSAFEIR: atomicrmw fmax ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} |
| 244 | + // UNSAFEIR: atomicrmw fmin ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}} |
| 245 | + // UNSAFEIR: atomicrmw fadd ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} |
| 246 | + // UNSAFEIR: atomicrmw fsub ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} |
| 247 | + // UNSAFEIR: atomicrmw fmax ptr {{.*}}<2 x float>{{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} |
| 248 | + // UNSAFEIR: atomicrmw fmin ptr {{.*}}<2 x float>{{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}} |
| 249 | + |
| 250 | + // SAFE: _Z4ffp7PDv2_f |
| 251 | + // SAFE: global_atomic_cmpswap |
| 252 | + // SAFE: global_atomic_cmpswap |
| 253 | + // SAFE: global_atomic_cmpswap |
| 254 | + // SAFE: global_atomic_cmpswap |
| 255 | + // SAFE: global_atomic_cmpswap |
| 256 | + // SAFE: global_atomic_cmpswap |
| 257 | + // SAFE: global_atomic_cmpswap |
| 258 | + // SAFE: global_atomic_cmpswap |
| 259 | + |
| 260 | + // UNSAFE: _Z4ffp7PDv2_f |
| 261 | + // UNSAFE: global_atomic_cmpswap |
| 262 | + // UNSAFE: global_atomic_cmpswap |
| 263 | + // UNSAFE: global_atomic_cmpswap |
| 264 | + // UNSAFE: global_atomic_cmpswap |
| 265 | + // UNSAFE: global_atomic_cmpswap |
| 266 | + // UNSAFE: global_atomic_cmpswap |
| 267 | + // UNSAFE: global_atomic_cmpswap |
| 268 | + // UNSAFE: global_atomic_cmpswap |
| 269 | + __atomic_fetch_add(p, {1.0f, 1.0f}, memory_order_relaxed); |
| 270 | + __atomic_fetch_sub(p, {1.0f, 1.0f}, memory_order_relaxed); |
| 271 | + __atomic_fetch_max(p, {1.0f, 1.0f}, memory_order_relaxed); |
| 272 | + __atomic_fetch_min(p, {1.0f, 1.0f}, memory_order_relaxed); |
| 273 | + __hip_atomic_fetch_add(p, {1.0f, 1.0f}, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); |
| 274 | + __hip_atomic_fetch_sub(p, {1.0f, 1.0f}, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); |
| 275 | + __hip_atomic_fetch_max(p, {1.0f, 1.0f}, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT); |
| 276 | + __hip_atomic_fetch_min(p, {1.0f, 1.0f}, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP); |
| 277 | +} |
| 278 | + |
228 | 279 | // CHECK-LABEL: @_Z12test_cmpxchgPiii |
229 | 280 | // CHECK: cmpxchg ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} acquire acquire, align 4{{$}} |
230 | 281 | // CHECK: cmpxchg weak ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} acquire acquire, align 4{{$}} |
|
0 commit comments