Skip to content

Commit 242582b

Browse files
committed
[Attributor] Take the address space from addrspacecast directly
If the value to be analyzed is directly from addrspacecast, we take the source address space directly. This is to improve the case where in `AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by insertting an addrspacecast directly from a generic pointer. However, during the analysis, the underlying object will be the generic pointer, instead of the addrspacecast, thus the inferred address space is the generic one, which is not ideal.
1 parent d905b1c commit 242582b

File tree

2 files changed

+131
-14
lines changed

2 files changed

+131
-14
lines changed

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12583,16 +12583,37 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1258312583
}
1258412584

1258512585
ChangeStatus updateImpl(Attributor &A) override {
12586+
assert(A.getInfoCache().getFlatAddressSpace().has_value());
12587+
unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
1258612588
uint32_t OldAddressSpace = AssumedAddressSpace;
12587-
auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
12588-
DepClassTy::REQUIRED);
12589-
auto Pred = [&](Value &Obj) {
12589+
12590+
auto CheckAddressSpace = [&](Value &Obj) {
1259012591
if (isa<UndefValue>(&Obj))
1259112592
return true;
12593+
// If an argument in flat address space only has addrspace cast uses, and
12594+
// those casts are same, then we take the dst addrspace.
12595+
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
12596+
if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
12597+
unsigned CastAddrSpace = FlatAS;
12598+
for (auto *U : Arg->users()) {
12599+
auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
12600+
if (!ASCI)
12601+
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
12602+
if (CastAddrSpace != FlatAS &&
12603+
CastAddrSpace != ASCI->getDestAddressSpace())
12604+
return false;
12605+
CastAddrSpace = ASCI->getDestAddressSpace();
12606+
}
12607+
if (CastAddrSpace != FlatAS)
12608+
return takeAddressSpace(CastAddrSpace);
12609+
}
12610+
}
1259212611
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
1259312612
};
1259412613

12595-
if (!AUO->forallUnderlyingObjects(Pred))
12614+
auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
12615+
DepClassTy::REQUIRED);
12616+
if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
1259612617
return indicatePessimisticFixpoint();
1259712618

1259812619
return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED
@@ -12601,17 +12622,21 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1260112622

1260212623
/// See AbstractAttribute::manifest(...).
1260312624
ChangeStatus manifest(Attributor &A) override {
12604-
if (getAddressSpace() == InvalidAddressSpace ||
12605-
getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
12625+
unsigned NewAS = getAddressSpace();
12626+
12627+
if (NewAS == InvalidAddressSpace ||
12628+
NewAS == getAssociatedType()->getPointerAddressSpace())
1260612629
return ChangeStatus::UNCHANGED;
1260712630

12631+
unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
12632+
1260812633
Value *AssociatedValue = &getAssociatedValue();
12609-
Value *OriginalValue = peelAddrspacecast(AssociatedValue);
12634+
Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS);
1261012635

1261112636
PointerType *NewPtrTy =
12612-
PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
12637+
PointerType::get(getAssociatedType()->getContext(), NewAS);
1261312638
bool UseOriginalValue =
12614-
OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace();
12639+
OriginalValue->getType()->getPointerAddressSpace() == NewAS;
1261512640

1261612641
bool Changed = false;
1261712642

@@ -12671,12 +12696,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1267112696
return AssumedAddressSpace == AS;
1267212697
}
1267312698

12674-
static Value *peelAddrspacecast(Value *V) {
12675-
if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
12676-
return peelAddrspacecast(I->getPointerOperand());
12699+
static Value *peelAddrspacecast(Value *V, unsigned FlatAS) {
12700+
if (auto *I = dyn_cast<AddrSpaceCastInst>(V)) {
12701+
assert(I->getSrcAddressSpace() != FlatAS &&
12702+
"there should not be flat AS -> non-flat AS");
12703+
return I->getPointerOperand();
12704+
}
1267712705
if (auto *C = dyn_cast<ConstantExpr>(V))
12678-
if (C->getOpcode() == Instruction::AddrSpaceCast)
12679-
return peelAddrspacecast(C->getOperand(0));
12706+
if (C->getOpcode() == Instruction::AddrSpaceCast) {
12707+
assert(C->getOperand(0)->getType()->getPointerAddressSpace() !=
12708+
FlatAS &&
12709+
"there should not be flat AS -> non-flat AS X");
12710+
return C->getOperand(0);
12711+
}
1268012712
return V;
1268112713
}
1268212714
};

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,88 @@ define void @foo(ptr addrspace(3) %val) {
243243
ret void
244244
}
245245

246+
define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
247+
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
248+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
249+
; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
250+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
251+
; CHECK-NEXT: ret void
252+
;
253+
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
254+
%p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
255+
store i32 %val, ptr %p.cast.1
256+
ret void
257+
}
258+
259+
define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
260+
; CHECK-LABEL: define internal void @use_argument_after_promotion(
261+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
262+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
263+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
264+
; CHECK-NEXT: ret void
265+
;
266+
store i32 %val, ptr %p
267+
ret void
268+
}
269+
270+
define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val) {
271+
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_inter_procedure(
272+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
273+
; CHECK-NEXT: call void @use_argument_after_promotion(ptr [[P]], i32 [[VAL]])
274+
; CHECK-NEXT: ret void
275+
;
276+
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
277+
%p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
278+
call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
279+
ret void
280+
}
281+
282+
define void @vec_kernel_argument_promotion_pattern_intra_procedure(<2 x ptr> %p, i32 %val) {
283+
; CHECK-LABEL: define void @vec_kernel_argument_promotion_pattern_intra_procedure(
284+
; CHECK-SAME: <2 x ptr> [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
285+
; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast <2 x ptr> [[P]] to <2 x ptr addrspace(1)>
286+
; CHECK-NEXT: [[P_CAST_1:%.*]] = addrspacecast <2 x ptr addrspace(1)> [[P_CAST_0]] to <2 x ptr>
287+
; CHECK-NEXT: [[P1:%.*]] = extractelement <2 x ptr> [[P_CAST_1]], i32 0
288+
; CHECK-NEXT: [[P2:%.*]] = extractelement <2 x ptr> [[P_CAST_1]], i32 1
289+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P1]], align 4
290+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P2]], align 4
291+
; CHECK-NEXT: ret void
292+
;
293+
%p.cast.0 = addrspacecast <2 x ptr> %p to <2 x ptr addrspace(1)>
294+
%p.cast.1 = addrspacecast <2 x ptr addrspace(1)> %p.cast.0 to <2 x ptr>
295+
%p1 = extractelement <2 x ptr> %p.cast.1, i32 0
296+
%p2 = extractelement <2 x ptr> %p.cast.1, i32 1
297+
store i32 %val, ptr %p1
298+
store i32 %val, ptr %p2
299+
ret void
300+
}
301+
302+
define internal void @use_vec_argument_after_promotion(<2 x ptr> %p, i32 %val) {
303+
; CHECK-LABEL: define internal void @use_vec_argument_after_promotion(
304+
; CHECK-SAME: <2 x ptr> [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
305+
; CHECK-NEXT: [[P1:%.*]] = extractelement <2 x ptr> [[P]], i32 0
306+
; CHECK-NEXT: [[P2:%.*]] = extractelement <2 x ptr> [[P]], i32 1
307+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P1]], align 4
308+
; CHECK-NEXT: store i32 [[VAL]], ptr [[P2]], align 4
309+
; CHECK-NEXT: ret void
310+
;
311+
%p1 = extractelement <2 x ptr> %p, i32 0
312+
%p2 = extractelement <2 x ptr> %p, i32 1
313+
store i32 %val, ptr %p1
314+
store i32 %val, ptr %p2
315+
ret void
316+
}
317+
318+
define void @vec_kernel_argument_promotion_pattern_inter_procedure(<2 x ptr> %p, i32 %val) {
319+
; CHECK-LABEL: define void @vec_kernel_argument_promotion_pattern_inter_procedure(
320+
; CHECK-SAME: <2 x ptr> [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
321+
; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast <2 x ptr> [[P]] to <2 x ptr addrspace(1)>
322+
; CHECK-NEXT: [[P_CAST_1:%.*]] = addrspacecast <2 x ptr addrspace(1)> [[P_CAST_0]] to <2 x ptr>
323+
; CHECK-NEXT: call void @use_vec_argument_after_promotion(<2 x ptr> [[P_CAST_1]], i32 [[VAL]])
324+
; CHECK-NEXT: ret void
325+
;
326+
%p.cast.0 = addrspacecast <2 x ptr> %p to <2 x ptr addrspace(1)>
327+
%p.cast.1 = addrspacecast <2 x ptr addrspace(1)> %p.cast.0 to <2 x ptr>
328+
call void @use_vec_argument_after_promotion(<2 x ptr> %p.cast.1, i32 %val)
329+
ret void
330+
}

0 commit comments

Comments
 (0)