Skip to content

Commit 5f30c2e

Browse files
committed
clang/AMDGPU: Set noalias.addrspace metadata on atomicrmw
1 parent 4647a46 commit 5f30c2e

File tree

5 files changed

+233
-206
lines changed

5 files changed

+233
-206
lines changed

clang/include/clang/Basic/LangOptions.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,14 @@ class LangOptions : public LangOptionsBase {
698698
return ConvergentFunctions;
699699
}
700700

701+
/// Return true if atomicrmw operations targeting allocations in private
702+
/// memory are undefined.
703+
bool threadPrivateMemoryAtomicsAreUndefined() const {
704+
// Should be false for OpenMP.
705+
// TODO: Should this be true for SYCL?
706+
return OpenCL || CUDA;
707+
}
708+
701709
/// Return the OpenCL C or C++ version as a VersionTuple.
702710
VersionTuple getOpenCLVersionTuple() const;
703711

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "ABIInfoImpl.h"
1010
#include "TargetInfo.h"
1111
#include "clang/Basic/TargetOptions.h"
12+
#include "llvm/Support/AMDGPUAddrSpace.h"
1213

1314
using namespace clang;
1415
using namespace clang::CodeGen;
@@ -550,6 +551,16 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
550551

551552
void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
552553
CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {
554+
555+
if (RMW.getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS &&
556+
CGF.CGM.getLangOpts().threadPrivateMemoryAtomicsAreUndefined()) {
557+
llvm::MDBuilder MDHelper(CGF.getLLVMContext());
558+
llvm::MDNode *ASRange = MDHelper.createRange(
559+
llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS),
560+
llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1));
561+
RMW.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
562+
}
563+
553564
if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
554565
return;
555566

clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu

Lines changed: 59 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@
2222

2323
__global__ void ffp1(float *p) {
2424
// CHECK-LABEL: @_Z4ffp1Pf
25-
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}}
26-
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}}
27-
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}}
28-
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}}
29-
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4{{$}}
30-
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}}
31-
32-
// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
33-
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
34-
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
35-
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
36-
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
37-
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
25+
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE:[0-9]+]]{{$}}
26+
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
27+
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
28+
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
29+
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
30+
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
31+
32+
// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE:[0-9]+]], !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
33+
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
34+
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
35+
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
36+
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
37+
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
3838

3939
// SAFE: _Z4ffp1Pf
4040
// SAFE: global_atomic_cmpswap
@@ -62,19 +62,19 @@ __global__ void ffp1(float *p) {
6262

6363
__global__ void ffp2(double *p) {
6464
// CHECK-LABEL: @_Z4ffp2Pd
65-
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
66-
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
67-
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
68-
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
69-
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}}
70-
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}
71-
72-
// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
73-
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
74-
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
75-
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
76-
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
77-
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
65+
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
66+
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
67+
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
68+
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
69+
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
70+
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
71+
72+
// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
73+
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
74+
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
75+
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
76+
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
77+
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
7878

7979
// SAFE-LABEL: @_Z4ffp2Pd
8080
// SAFE: global_atomic_cmpswap_b64
@@ -102,19 +102,19 @@ __global__ void ffp2(double *p) {
102102
// long double is the same as double for amdgcn.
103103
__global__ void ffp3(long double *p) {
104104
// CHECK-LABEL: @_Z4ffp3Pe
105-
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
106-
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
107-
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
108-
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
109-
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}}
110-
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}
111-
112-
// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
113-
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
114-
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
115-
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
116-
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
117-
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
105+
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
106+
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
107+
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
108+
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
109+
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
110+
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
111+
112+
// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
113+
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
114+
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
115+
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
116+
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
117+
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
118118

119119
// SAFE-LABEL: @_Z4ffp3Pe
120120
// SAFE: global_atomic_cmpswap_b64
@@ -139,34 +139,34 @@ __global__ void ffp3(long double *p) {
139139
__device__ double ffp4(double *p, float f) {
140140
// CHECK-LABEL: @_Z4ffp4Pdf
141141
// CHECK: fpext float {{.*}} to double
142-
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
143-
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
142+
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
143+
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
144144
return __atomic_fetch_sub(p, f, memory_order_relaxed);
145145
}
146146

147147
__device__ double ffp5(double *p, int i) {
148148
// CHECK-LABEL: @_Z4ffp5Pdi
149149
// CHECK: sitofp i32 {{.*}} to double
150-
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
151-
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
150+
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
151+
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
152152
return __atomic_fetch_sub(p, i, memory_order_relaxed);
153153
}
154154

155155
__global__ void ffp6(_Float16 *p) {
156156
// CHECK-LABEL: @_Z4ffp6PDF16
157-
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}}
158-
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}}
159-
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}}
160-
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}}
161-
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2{{$}}
162-
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2{{$}}
163-
164-
// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
165-
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
166-
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
167-
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
168-
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
169-
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
157+
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
158+
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
159+
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
160+
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
161+
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
162+
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]]{{$}}
163+
164+
// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
165+
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
166+
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
167+
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
168+
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
169+
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !noalias.addrspace ![[NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
170170

171171
// SAFE: _Z4ffp6PDF16
172172
// SAFE: global_atomic_cmpswap
@@ -190,3 +190,6 @@ __global__ void ffp6(_Float16 *p) {
190190
__hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
191191
__hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
192192
}
193+
194+
// SAFEIR: ![[NO_PRIVATE]] = !{i32 5, i32 6}
195+
// UNSAFEIR: ![[NO_PRIVATE]] = !{i32 5, i32 6}

0 commit comments

Comments
 (0)