Skip to content

Commit 7a51dde

Browse files
authored
InferAddressSpaces: Improve handling of instructions with multiple pointer uses (#101922)
The use list iteration worked correctly for the load and store case. The atomic instructions happen to have the pointer value as the last visited operand, but we rejected the instruction as simple after the first encountered use. Ignore the use list for the recognized load/store/atomic instructions, and just try to directly replace the known pointer use.
1 parent 57cd100 commit 7a51dde

File tree

2 files changed

+194
-31
lines changed

2 files changed

+194
-31
lines changed

llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,31 +1022,52 @@ bool InferAddressSpacesImpl::updateAddressSpace(
10221022
return true;
10231023
}
10241024

1025-
/// \p returns true if \p U is the pointer operand of a memory instruction with
1026-
/// a single pointer operand that can have its address space changed by simply
1027-
/// mutating the use to a new value. If the memory instruction is volatile,
1028-
/// return true only if the target allows the memory instruction to be volatile
1029-
/// in the new address space.
1030-
static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
1031-
Use &U, unsigned AddrSpace) {
1032-
User *Inst = U.getUser();
1033-
unsigned OpNo = U.getOperandNo();
1025+
/// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal
1026+
/// with \p NewVal.
1027+
static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx,
1028+
Value *OldVal, Value *NewVal) {
1029+
Use &U = Inst->getOperandUse(OpIdx);
1030+
if (U.get() == OldVal) {
1031+
U.set(NewVal);
1032+
return true;
1033+
}
1034+
1035+
return false;
1036+
}
10341037

1038+
template <typename InstrType>
1039+
static bool replaceSimplePointerUse(const TargetTransformInfo &TTI,
1040+
InstrType *MemInstr, unsigned AddrSpace,
1041+
Value *OldV, Value *NewV) {
1042+
if (!MemInstr->isVolatile() || TTI.hasVolatileVariant(MemInstr, AddrSpace)) {
1043+
return replaceOperandIfSame(MemInstr, InstrType::getPointerOperandIndex(),
1044+
OldV, NewV);
1045+
}
1046+
1047+
return false;
1048+
}
1049+
1050+
/// If \p OldV is used as the pointer operand of a compatible memory operation
1051+
/// \p Inst, replaces the pointer operand with NewV.
1052+
///
1053+
/// This covers memory instructions with a single pointer operand that can have
1054+
/// its address space changed by simply mutating the use to a new value.
1055+
///
1056+
/// \p returns true the user replacement was made.
1057+
static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI,
1058+
User *Inst, unsigned AddrSpace,
1059+
Value *OldV, Value *NewV) {
10351060
if (auto *LI = dyn_cast<LoadInst>(Inst))
1036-
return OpNo == LoadInst::getPointerOperandIndex() &&
1037-
(!LI->isVolatile() || TTI.hasVolatileVariant(LI, AddrSpace));
1061+
return replaceSimplePointerUse(TTI, LI, AddrSpace, OldV, NewV);
10381062

10391063
if (auto *SI = dyn_cast<StoreInst>(Inst))
1040-
return OpNo == StoreInst::getPointerOperandIndex() &&
1041-
(!SI->isVolatile() || TTI.hasVolatileVariant(SI, AddrSpace));
1064+
return replaceSimplePointerUse(TTI, SI, AddrSpace, OldV, NewV);
10421065

10431066
if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
1044-
return OpNo == AtomicRMWInst::getPointerOperandIndex() &&
1045-
(!RMW->isVolatile() || TTI.hasVolatileVariant(RMW, AddrSpace));
1067+
return replaceSimplePointerUse(TTI, RMW, AddrSpace, OldV, NewV);
10461068

10471069
if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst))
1048-
return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
1049-
(!CmpX->isVolatile() || TTI.hasVolatileVariant(CmpX, AddrSpace));
1070+
return replaceSimplePointerUse(TTI, CmpX, AddrSpace, OldV, NewV);
10501071

10511072
return false;
10521073
}
@@ -1245,14 +1266,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
12451266
// to the next instruction.
12461267
I = skipToNextUser(I, E);
12471268

1248-
if (isSimplePointerUseValidToReplace(
1249-
*TTI, U, V->getType()->getPointerAddressSpace())) {
1250-
// If V is used as the pointer operand of a compatible memory operation,
1251-
// sets the pointer operand to NewV. This replacement does not change
1252-
// the element type, so the resultant load/store is still valid.
1253-
U.set(NewV);
1269+
unsigned AddrSpace = V->getType()->getPointerAddressSpace();
1270+
if (replaceIfSimplePointerUse(*TTI, CurUser, AddrSpace, V, NewV))
12541271
continue;
1255-
}
12561272

12571273
// Skip if the current user is the new value itself.
12581274
if (CurUser == NewV)

llvm/test/Transforms/InferAddressSpaces/AMDGPU/store-pointer-to-self.ll

Lines changed: 154 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,47 @@ define void @store_flat_pointer_to_self() {
2222
ret void
2323
}
2424

25-
; FIXME: Should be able to optimize the pointer operand to flat.
25+
define void @store_volatile_flat_pointer_to_self() {
26+
; CHECK-LABEL: define void @store_volatile_flat_pointer_to_self() {
27+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
28+
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
29+
; CHECK-NEXT: store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
30+
; CHECK-NEXT: call void @user(ptr [[FLAT]])
31+
; CHECK-NEXT: ret void
32+
;
33+
%alloca = alloca ptr, align 8, addrspace(5)
34+
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
35+
store volatile ptr %flat, ptr %flat, align 8
36+
call void @user(ptr %flat)
37+
ret void
38+
}
39+
2640
define ptr @atomicrmw_xchg_flat_pointer_to_self() {
2741
; CHECK-LABEL: define ptr @atomicrmw_xchg_flat_pointer_to_self() {
2842
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
43+
; CHECK-NEXT: [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
44+
; CHECK-NEXT: [[XCHG:%.*]] = atomicrmw xchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT1]] seq_cst, align 8
45+
; CHECK-NEXT: call void @user(ptr [[FLAT1]])
46+
; CHECK-NEXT: ret ptr [[XCHG]]
47+
;
48+
%alloca = alloca ptr, align 8, addrspace(5)
49+
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
50+
%xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
51+
call void @user(ptr %flat)
52+
ret ptr %xchg
53+
}
54+
55+
define ptr @atomicrmw_volatile_xchg_flat_pointer_to_self() {
56+
; CHECK-LABEL: define ptr @atomicrmw_volatile_xchg_flat_pointer_to_self() {
57+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
2958
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
30-
; CHECK-NEXT: [[XCHG:%.*]] = atomicrmw xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
59+
; CHECK-NEXT: [[XCHG:%.*]] = atomicrmw volatile xchg ptr [[FLAT]], ptr [[FLAT]] seq_cst, align 8
3160
; CHECK-NEXT: call void @user(ptr [[FLAT]])
3261
; CHECK-NEXT: ret ptr [[XCHG]]
3362
;
3463
%alloca = alloca ptr, align 8, addrspace(5)
3564
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
36-
%xchg = atomicrmw xchg ptr %flat, ptr %flat seq_cst, align 8
65+
%xchg = atomicrmw volatile xchg ptr %flat, ptr %flat seq_cst, align 8
3766
call void @user(ptr %flat)
3867
ret ptr %xchg
3968
}
@@ -42,14 +71,46 @@ define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
4271
; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_new_to_self(
4372
; CHECK-SAME: ptr [[CMP:%.*]]) {
4473
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
74+
; CHECK-NEXT: [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
75+
; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[CMP]], ptr [[FLAT1]] seq_cst seq_cst, align 8
76+
; CHECK-NEXT: call void @user(ptr [[FLAT1]])
77+
; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
78+
;
79+
%alloca = alloca ptr, align 8, addrspace(5)
80+
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
81+
%cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
82+
call void @user(ptr %flat)
83+
ret { ptr, i1 } %cmpx
84+
}
85+
86+
define { ptr, i1 } @cmpxchg_volatile_flat_pointer_new_to_self(ptr %cmp) {
87+
; CHECK-LABEL: define { ptr, i1 } @cmpxchg_volatile_flat_pointer_new_to_self(
88+
; CHECK-SAME: ptr [[CMP:%.*]]) {
89+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
4590
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
46-
; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
91+
; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg volatile ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
4792
; CHECK-NEXT: call void @user(ptr [[FLAT]])
4893
; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
4994
;
5095
%alloca = alloca ptr, align 8, addrspace(5)
5196
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
52-
%cmpx = cmpxchg ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
97+
%cmpx = cmpxchg volatile ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
98+
call void @user(ptr %flat)
99+
ret { ptr, i1 } %cmpx
100+
}
101+
102+
define { ptr, i1 } @volatile_cmpxchg_flat_pointer_new_to_self(ptr %cmp) {
103+
; CHECK-LABEL: define { ptr, i1 } @volatile_cmpxchg_flat_pointer_new_to_self(
104+
; CHECK-SAME: ptr [[CMP:%.*]]) {
105+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
106+
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
107+
; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg volatile ptr [[FLAT]], ptr [[CMP]], ptr [[FLAT]] seq_cst seq_cst, align 8
108+
; CHECK-NEXT: call void @user(ptr [[FLAT]])
109+
; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
110+
;
111+
%alloca = alloca ptr, align 8, addrspace(5)
112+
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
113+
%cmpx = cmpxchg volatile ptr %flat, ptr %cmp, ptr %flat seq_cst seq_cst, align 8
53114
call void @user(ptr %flat)
54115
ret { ptr, i1 } %cmpx
55116
}
@@ -58,14 +119,100 @@ define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(ptr %new) {
58119
; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_to_self(
59120
; CHECK-SAME: ptr [[NEW:%.*]]) {
60121
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
122+
; CHECK-NEXT: [[FLAT1:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
123+
; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT1]], ptr [[NEW]] seq_cst seq_cst, align 8
124+
; CHECK-NEXT: call void @user(ptr [[FLAT1]])
125+
; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
126+
;
127+
%alloca = alloca ptr, align 8, addrspace(5)
128+
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
129+
%cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
130+
call void @user(ptr %flat)
131+
ret { ptr, i1 } %cmpx
132+
}
133+
134+
define { ptr, i1 } @cmpxchg_flat_pointer_cmp_new_self() {
135+
; CHECK-LABEL: define { ptr, i1 } @cmpxchg_flat_pointer_cmp_new_self() {
136+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
61137
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
62-
; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr [[FLAT]], ptr [[FLAT]], ptr [[NEW]] seq_cst seq_cst, align 8
138+
; CHECK-NEXT: [[CMPX:%.*]] = cmpxchg ptr addrspace(5) [[ALLOCA]], ptr [[FLAT]], ptr [[FLAT]] seq_cst seq_cst, align 8
63139
; CHECK-NEXT: call void @user(ptr [[FLAT]])
64140
; CHECK-NEXT: ret { ptr, i1 } [[CMPX]]
65141
;
66142
%alloca = alloca ptr, align 8, addrspace(5)
67143
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
68-
%cmpx = cmpxchg ptr %flat, ptr %flat, ptr %new seq_cst seq_cst, align 8
144+
%cmpx = cmpxchg ptr %flat, ptr %flat, ptr %flat seq_cst seq_cst, align 8
69145
call void @user(ptr %flat)
70146
ret { ptr, i1 } %cmpx
71147
}
148+
149+
define void @multi_store_flat_pointer_to_self() {
150+
; CHECK-LABEL: define void @multi_store_flat_pointer_to_self() {
151+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
152+
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
153+
; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
154+
; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
155+
; CHECK-NEXT: call void @user(ptr [[FLAT]])
156+
; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
157+
; CHECK-NEXT: store ptr addrspace(5) [[ALLOCA]], ptr addrspace(5) [[ALLOCA]], align 8
158+
; CHECK-NEXT: ret void
159+
;
160+
%alloca = alloca ptr, align 8, addrspace(5)
161+
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
162+
store ptr %flat, ptr %flat, align 8
163+
store ptr %flat, ptr %flat, align 8
164+
call void @user(ptr %flat)
165+
store ptr %flat, ptr addrspace(5) %alloca, align 8
166+
store ptr addrspace(5) %alloca, ptr %flat, align 8
167+
ret void
168+
}
169+
170+
define void @mixed_volatile_multi_store_flat_pointer_to_self() {
171+
; CHECK-LABEL: define void @mixed_volatile_multi_store_flat_pointer_to_self() {
172+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca ptr, align 8, addrspace(5)
173+
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
174+
; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
175+
; CHECK-NEXT: store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
176+
; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
177+
; CHECK-NEXT: call void @user(ptr [[FLAT]])
178+
; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
179+
; CHECK-NEXT: store ptr addrspace(5) [[ALLOCA]], ptr addrspace(5) [[ALLOCA]], align 8
180+
; CHECK-NEXT: store volatile ptr [[FLAT]], ptr [[FLAT]], align 8
181+
; CHECK-NEXT: store ptr [[FLAT]], ptr addrspace(5) [[ALLOCA]], align 8
182+
; CHECK-NEXT: ret void
183+
;
184+
%alloca = alloca ptr, align 8, addrspace(5)
185+
%flat = addrspacecast ptr addrspace(5) %alloca to ptr
186+
store ptr %flat, ptr %flat, align 8
187+
store volatile ptr %flat, ptr %flat, align 8
188+
store ptr %flat, ptr %flat, align 8
189+
call void @user(ptr %flat)
190+
store ptr %flat, ptr addrspace(5) %alloca, align 8
191+
store ptr addrspace(5) %alloca, ptr %flat, align 8
192+
store volatile ptr %flat, ptr %flat, align 8
193+
store ptr %flat, ptr %flat, align 8
194+
ret void
195+
}
196+
197+
define amdgpu_kernel void @uselist_regression_skipped_load(ptr nocapture readonly %Arg, i32 %i) {
198+
; CHECK-LABEL: define amdgpu_kernel void @uselist_regression_skipped_load(
199+
; CHECK-SAME: ptr nocapture readonly [[ARG:%.*]], i32 [[I:%.*]]) {
200+
; CHECK-NEXT: [[ENTRY:.*:]]
201+
; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG]] to ptr addrspace(1)
202+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I]]
203+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
204+
; CHECK-NEXT: [[P2:%.*]] = load volatile ptr, ptr [[TMP0]], align 8
205+
; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1)
206+
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P2_GLOBAL]], align 4
207+
; CHECK-NEXT: ret void
208+
;
209+
entry:
210+
%Arg.global = addrspacecast ptr %Arg to ptr addrspace(1)
211+
%Arg.flat = addrspacecast ptr addrspace(1) %Arg.global to ptr
212+
%p1 = getelementptr inbounds ptr, ptr %Arg.flat, i32 %i
213+
%p2 = load volatile ptr, ptr %p1, align 8
214+
%p2.global = addrspacecast ptr %p2 to ptr addrspace(1)
215+
%p2.flat = addrspacecast ptr addrspace(1) %p2.global to ptr
216+
store float 0.000000e+00, ptr %p2.flat, align 4
217+
ret void
218+
}

0 commit comments

Comments
 (0)