Skip to content

Commit 3755175

Browse files
committed
[RFC] IR: Define noalias.addrspace metadata
This is intended to solve a problem with lowering atomics in OpenMP and C++ common to AMDGPU and NVPTX. In OpenCL and CUDA, it is undefined behavior for an atomic instruction to modify an object in thread private memory. In OpenMP, it is defined. Correspondingly, the hardware does not handle this correctly. For AMDGPU, 32-bit atomics work and 64-bit atomics are silently dropped. We therefore need to codegen this by inserting a runtime address space check, performing the private case without atomics, and fallback to issuing the real atomic otherwise. This metadata allows us to avoid this extra check and branch. Handle this by introducing metadata intended to be applied to atomicrmw, indicating they cannot access the forbidden address space.
1 parent c49a1ae commit 3755175

File tree

6 files changed

+237
-6
lines changed

6 files changed

+237
-6
lines changed

llvm/docs/LangRef.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8021,6 +8021,42 @@ it will contain a list of ids, including the ids of the callsites in the
80218021
full inline sequence, in order from the leaf-most call's id to the outermost
80228022
inlined call.
80238023

8024+
8025+
'``noalias.addrspace``' Metadata
8026+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
8027+
8028+
The ``noalias.addrspace`` metadata is used to identify memory
8029+
operations which cannot access a range of address spaces. It is
8030+
attached to memory instructions, including :ref:`atomicrmw
8031+
<i_atomicrmw>`, :ref:`cmpxchg <i_cmpxchg>`, and :ref:`call <i_call>`
8032+
instructions.
8033+
8034+
This follows the same form as :ref:`range metadata <_range-metadata>`,
8035+
except the field entries must be of type `i32`. The interpretation is
8036+
the same numeric address spaces as applied to IR values.
8037+
8038+
Example:
8039+
8040+
.. code-block:: llvm
8041+
; %ptr cannot point to an object allocated in addrspace(5)
8042+
%rmw.valid = atomicrmw and ptr %ptr, i64 %value seq_cst, !noalias.addrspace !0
8043+
8044+
; Undefined behavior. The underlying object is allocated in one of the listed
8045+
; address spaces.
8046+
%alloca = alloca i64, addrspace(5)
8047+
%alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
8048+
%rmw.ub = atomicrmw and ptr %alloca.cast, i64 %value seq_cst, !noalias.addrspace !0
8049+
8050+
!0 = !{i32 5, i32 6}
8051+
8052+
8053+
This is intended for use on targets with a notion of generic address
8054+
spaces, which at runtime resolve to different physical memory
8055+
spaces. The interpretation of the address space values is target
8056+
specific. The behavior is undefined if the runtime memory address does
8057+
resolve to an object defined in one of the indicated address spaces.
8058+
8059+
80248060
Module Flags Metadata
80258061
=====================
80268062

llvm/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ Changes to the LLVM IR
5656

5757
* Added ``usub_cond`` and ``usub_sat`` operations to ``atomicrmw``.
5858

59+
* Introduced `noalias.addrspace` metadata.
60+
5961
Changes to LLVM infrastructure
6062
------------------------------
6163

llvm/include/llvm/IR/FixedMetadataKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37)
5252
LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
5353
LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
5454
LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
55+
LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)

llvm/lib/IR/Verifier.cpp

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -516,8 +516,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
516516
void visitFunction(const Function &F);
517517
void visitBasicBlock(BasicBlock &BB);
518518
void verifyRangeMetadata(const Value &V, const MDNode *Range, Type *Ty,
519-
bool IsAbsoluteSymbol);
519+
bool IsAbsoluteSymbol, bool IsAddrSpaceRange);
520520
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
521+
void visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range, Type *Ty);
521522
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
522523
void visitProfMetadata(Instruction &I, MDNode *MD);
523524
void visitCallStackMetadata(MDNode *MD);
@@ -761,7 +762,7 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
761762
if (const MDNode *AbsoluteSymbol =
762763
GO->getMetadata(LLVMContext::MD_absolute_symbol)) {
763764
verifyRangeMetadata(*GO, AbsoluteSymbol, DL.getIntPtrType(GO->getType()),
764-
true);
765+
true, false);
765766
}
766767
}
767768

@@ -4131,7 +4132,8 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
41314132
/// Verify !range and !absolute_symbol metadata. These have the same
41324133
/// restrictions, except !absolute_symbol allows the full set.
41334134
void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
4134-
Type *Ty, bool IsAbsoluteSymbol) {
4135+
Type *Ty, bool IsAbsoluteSymbol,
4136+
bool IsAddrSpaceRange) {
41354137
unsigned NumOperands = Range->getNumOperands();
41364138
Check(NumOperands % 2 == 0, "Unfinished range!", Range);
41374139
unsigned NumRanges = NumOperands / 2;
@@ -4148,8 +4150,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41484150

41494151
Check(High->getType() == Low->getType(), "Range pair types must match!",
41504152
&I);
4151-
Check(High->getType() == Ty->getScalarType(),
4152-
"Range types must match instruction type!", &I);
4153+
4154+
if (IsAddrSpaceRange) {
4155+
Check(High->getType()->isIntegerTy(32),
4156+
"noalias.addrspace type must be i32!", &I);
4157+
} else {
4158+
Check(High->getType() == Ty->getScalarType(),
4159+
"Range types must match instruction type!", &I);
4160+
}
41534161

41544162
APInt HighV = High->getValue();
41554163
APInt LowV = Low->getValue();
@@ -4188,7 +4196,14 @@ void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
41884196
void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
41894197
assert(Range && Range == I.getMetadata(LLVMContext::MD_range) &&
41904198
"precondition violation");
4191-
verifyRangeMetadata(I, Range, Ty, false);
4199+
verifyRangeMetadata(I, Range, Ty, false, false);
4200+
}
4201+
4202+
void Verifier::visitNoaliasAddrspaceMetadata(Instruction &I, MDNode *Range,
4203+
Type *Ty) {
4204+
assert(Range && Range == I.getMetadata(LLVMContext::MD_noalias_addrspace) &&
4205+
"precondition violation");
4206+
verifyRangeMetadata(I, Range, Ty, false, true);
41924207
}
41934208

41944209
void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) {
@@ -5181,6 +5196,13 @@ void Verifier::visitInstruction(Instruction &I) {
51815196
visitRangeMetadata(I, Range, I.getType());
51825197
}
51835198

5199+
if (MDNode *Range = I.getMetadata(LLVMContext::MD_noalias_addrspace)) {
5200+
Check(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicRMWInst>(I) ||
5201+
isa<AtomicCmpXchgInst>(I) || isa<CallInst>(I),
5202+
"noalias.addrspace are only for memory operations!", &I);
5203+
visitNoaliasAddrspaceMetadata(I, Range, I.getType());
5204+
}
5205+
51845206
if (I.hasMetadata(LLVMContext::MD_invariant_group)) {
51855207
Check(isa<LoadInst>(I) || isa<StoreInst>(I),
51865208
"invariant.group metadata is only for loads and stores", &I);
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
2+
3+
define i64 @atomicrmw_noalias_addrspace__0_1(ptr %ptr, i64 %val) {
4+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_1(
5+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
6+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META0:![0-9]+]]
7+
; CHECK-NEXT: ret i64 [[RET]]
8+
;
9+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !0
10+
ret i64 %ret
11+
}
12+
13+
define i64 @atomicrmw_noalias_addrspace__0_2(ptr %ptr, i64 %val) {
14+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__0_2(
15+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
16+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META1:![0-9]+]]
17+
; CHECK-NEXT: ret i64 [[RET]]
18+
;
19+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !1
20+
ret i64 %ret
21+
}
22+
23+
define i64 @atomicrmw_noalias_addrspace__1_3(ptr %ptr, i64 %val) {
24+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__1_3(
25+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
26+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META2:![0-9]+]]
27+
; CHECK-NEXT: ret i64 [[RET]]
28+
;
29+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !2
30+
ret i64 %ret
31+
}
32+
33+
define i64 @atomicrmw_noalias_addrspace__multiple_ranges(ptr %ptr, i64 %val) {
34+
; CHECK-LABEL: define i64 @atomicrmw_noalias_addrspace__multiple_ranges(
35+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
36+
; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr [[PTR]], i64 [[VAL]] seq_cst, align 8, !noalias.addrspace [[META3:![0-9]+]]
37+
; CHECK-NEXT: ret i64 [[RET]]
38+
;
39+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3
40+
ret i64 %ret
41+
}
42+
43+
define i64 @load_noalias_addrspace__5_6(ptr %ptr) {
44+
; CHECK-LABEL: define i64 @load_noalias_addrspace__5_6(
45+
; CHECK-SAME: ptr [[PTR:%.*]]) {
46+
; CHECK-NEXT: [[RET:%.*]] = load i64, ptr [[PTR]], align 4, !noalias.addrspace [[META4:![0-9]+]]
47+
; CHECK-NEXT: ret i64 [[RET]]
48+
;
49+
%ret = load i64, ptr %ptr, align 4, !noalias.addrspace !4
50+
ret i64 %ret
51+
}
52+
53+
define void @store_noalias_addrspace__5_6(ptr %ptr, i64 %val) {
54+
; CHECK-LABEL: define void @store_noalias_addrspace__5_6(
55+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]]) {
56+
; CHECK-NEXT: store i64 [[VAL]], ptr [[PTR]], align 4, !noalias.addrspace [[META4]]
57+
; CHECK-NEXT: ret void
58+
;
59+
store i64 %val, ptr %ptr, align 4, !noalias.addrspace !4
60+
ret void
61+
}
62+
63+
define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(ptr %ptr, i64 %val0, i64 %val1) {
64+
; CHECK-LABEL: define { i64, i1 } @cmpxchg_noalias_addrspace__5_6(
65+
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL0:%.*]], i64 [[VAL1:%.*]]) {
66+
; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL0]], i64 [[VAL1]] monotonic monotonic, align 8, !noalias.addrspace [[META4]]
67+
; CHECK-NEXT: ret { i64, i1 } [[RET]]
68+
;
69+
%ret = cmpxchg ptr %ptr, i64 %val0, i64 %val1 monotonic monotonic, align 8, !noalias.addrspace !4
70+
ret { i64, i1 } %ret
71+
}
72+
73+
declare void @foo()
74+
75+
define void @call_noalias_addrspace__5_6(ptr %ptr) {
76+
; CHECK-LABEL: define void @call_noalias_addrspace__5_6(
77+
; CHECK-SAME: ptr [[PTR:%.*]]) {
78+
; CHECK-NEXT: call void @foo(), !noalias.addrspace [[META4]]
79+
; CHECK-NEXT: ret void
80+
;
81+
call void @foo(), !noalias.addrspace !4
82+
ret void
83+
}
84+
85+
define void @call_memcpy_intrinsic_addrspace__5_6(ptr %dst, ptr %src, i64 %size) {
86+
; CHECK-LABEL: define void @call_memcpy_intrinsic_addrspace__5_6(
87+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
88+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false), !noalias.addrspace [[META4]]
89+
; CHECK-NEXT: ret void
90+
;
91+
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false), !noalias.addrspace !4
92+
ret void
93+
}
94+
95+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
96+
97+
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
98+
99+
!0 = !{i32 0, i32 1}
100+
!1 = !{i32 0, i32 2}
101+
!2 = !{i32 1, i32 3}
102+
!3 = !{i32 4, i32 6, i32 10, i32 55}
103+
!4 = !{i32 5, i32 6}
104+
;.
105+
; CHECK: [[META0]] = !{i32 0, i32 1}
106+
; CHECK: [[META1]] = !{i32 0, i32 2}
107+
; CHECK: [[META2]] = !{i32 1, i32 3}
108+
; CHECK: [[META3]] = !{i32 4, i32 6, i32 10, i32 55}
109+
; CHECK: [[META4]] = !{i32 5, i32 6}
110+
;.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
2+
3+
; CHECK: It should have at least one range!
4+
; CHECK-NEXT: !0 = !{}
5+
define i64 @noalias_addrspace__empty(ptr %ptr, i64 %val) {
6+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !0
7+
ret i64 %ret
8+
}
9+
10+
; CHECK: Unfinished range!
11+
; CHECK-NEXT: !1 = !{i32 0}
12+
define i64 @noalias_addrspace__single_field(ptr %ptr, i64 %val) {
13+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !1
14+
ret i64 %ret
15+
}
16+
17+
; CHECK: Range must not be empty!
18+
; CHECK-NEXT: !2 = !{i32 0, i32 0}
19+
define i64 @noalias_addrspace__0_0(ptr %ptr, i64 %val) {
20+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !2
21+
ret i64 %ret
22+
}
23+
24+
; CHECK: noalias.addrspace type must be i32!
25+
; CHECK-NEXT: %ret = atomicrmw add ptr %ptr, i64 %val seq_cst, align 8, !noalias.addrspace !3
26+
define i64 @noalias_addrspace__i64(ptr %ptr, i64 %val) {
27+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !3
28+
ret i64 %ret
29+
}
30+
31+
; CHECK: The lower limit must be an integer!
32+
define i64 @noalias_addrspace__fp(ptr %ptr, i64 %val) {
33+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !4
34+
ret i64 %ret
35+
}
36+
37+
; CHECK: The lower limit must be an integer!
38+
define i64 @noalias_addrspace__ptr(ptr %ptr, i64 %val) {
39+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !5
40+
ret i64 %ret
41+
}
42+
43+
; CHECK: The lower limit must be an integer!
44+
define i64 @noalias_addrspace__nonconstant(ptr %ptr, i64 %val) {
45+
%ret = atomicrmw add ptr %ptr, i64 %val seq_cst, !noalias.addrspace !6
46+
ret i64 %ret
47+
}
48+
49+
@gv0 = global i32 0
50+
@gv1 = global i32 1
51+
52+
!0 = !{}
53+
!1 = !{i32 0}
54+
!2 = !{i32 0, i32 0}
55+
!3 = !{i64 1, i64 5}
56+
!4 = !{float 0.0, float 2.0}
57+
!5 = !{ptr null, ptr addrspace(1) null}
58+
!6 = !{i32 ptrtoint (ptr @gv0 to i32), i32 ptrtoint (ptr @gv1 to i32) }
59+
60+

0 commit comments

Comments
 (0)