Skip to content

Commit d749116

Browse files
committed
[Attributor] Take the address space from addrspacecast directly
If the value to be analyzed is directly from addrspacecast, we take the source address space directly. This is to improve the case where in `AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by insertting an addrspacecast directly from a generic pointer. However, during the analysis, the underlying object will be the generic pointer, instead of the addrspacecast, thus the inferred address space is the generic one, which is not ideal.
1 parent dee058f commit d749116

File tree

3 files changed

+108
-15
lines changed

3 files changed

+108
-15
lines changed

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12571,8 +12571,35 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1257112571
void initialize(Attributor &A) override {
1257212572
assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
1257312573
"Associated value is not a pointer");
12574-
if (getAssociatedType()->getPointerAddressSpace())
12574+
// If the pointer already has non-flat address space, we assume it is the
12575+
// correct one.
12576+
if (getAssociatedType()->getPointerAddressSpace()) {
12577+
[[maybe_unused]] bool R =
12578+
takeAddressSpace(getAssociatedType()->getPointerAddressSpace());
12579+
assert(R && "the take should happen");
1257512580
indicateOptimisticFixpoint();
12581+
return;
12582+
}
12583+
// If the pointer is an addrspacecast, it has to be from a non-flat to flat.
12584+
// We assume the source address space is the correct one.
12585+
Value *V = &getAssociatedValue();
12586+
if (auto *ASCI = dyn_cast<AddrSpaceCastInst>(V)) {
12587+
assert(ASCI->getDestAddressSpace() == 0 &&
12588+
"The destination address space should be a flat address space");
12589+
[[maybe_unused]] bool R = takeAddressSpace(ASCI->getSrcAddressSpace());
12590+
assert(R && "the take should happen");
12591+
indicateOptimisticFixpoint();
12592+
return;
12593+
}
12594+
if (auto *C = dyn_cast<ConstantExpr>(V)) {
12595+
if (C->getOpcode() == Instruction::AddrSpaceCast) {
12596+
[[maybe_unused]] bool R = takeAddressSpace(
12597+
C->getOperand(0)->getType()->getPointerAddressSpace());
12598+
assert(R && "the take should happen");
12599+
indicateOptimisticFixpoint();
12600+
return;
12601+
}
12602+
}
1257612603
}
1257712604

1257812605
ChangeStatus updateImpl(Attributor &A) override {
@@ -12582,6 +12609,23 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1258212609
auto Pred = [&](Value &Obj) {
1258312610
if (isa<UndefValue>(&Obj))
1258412611
return true;
12612+
// If an argument in generic address space has addrspace cast uses, and
12613+
// those casts are same, then we take the dst addrspace.
12614+
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
12615+
if (Arg->getType()->getPointerAddressSpace() == 0) {
12616+
unsigned CastAddrSpace = 0;
12617+
for (auto *U : Arg->users()) {
12618+
auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
12619+
if (!ASCI)
12620+
continue;
12621+
if (CastAddrSpace && CastAddrSpace != ASCI->getDestAddressSpace())
12622+
return false;
12623+
CastAddrSpace = ASCI->getDestAddressSpace();
12624+
}
12625+
if (CastAddrSpace)
12626+
return takeAddressSpace(CastAddrSpace);
12627+
}
12628+
}
1258512629
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
1258612630
};
1258712631

@@ -12594,16 +12638,18 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1259412638

1259512639
/// See AbstractAttribute::manifest(...).
1259612640
ChangeStatus manifest(Attributor &A) override {
12597-
Value *AssociatedValue = &getAssociatedValue();
12598-
Value *OriginalValue = peelAddrspacecast(AssociatedValue);
12599-
if (getAddressSpace() == NoAddressSpace ||
12600-
getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
12641+
unsigned NewAS = getAddressSpace();
12642+
12643+
if (NewAS == NoAddressSpace ||
12644+
NewAS == getAssociatedType()->getPointerAddressSpace())
1260112645
return ChangeStatus::UNCHANGED;
1260212646

12647+
Value *AssociatedValue = &getAssociatedValue();
12648+
Value *OriginalValue = peelAddrspacecast(AssociatedValue);
1260312649
PointerType *NewPtrTy =
12604-
PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
12650+
PointerType::get(getAssociatedType()->getContext(), NewAS);
1260512651
bool UseOriginalValue =
12606-
OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace();
12652+
OriginalValue->getType()->getPointerAddressSpace() == NewAS;
1260712653

1260812654
bool Changed = false;
1260912655

@@ -12664,11 +12710,16 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1266412710
}
1266512711

1266612712
static Value *peelAddrspacecast(Value *V) {
12667-
if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
12668-
return peelAddrspacecast(I->getPointerOperand());
12713+
if (auto *I = dyn_cast<AddrSpaceCastInst>(V)) {
12714+
assert(I->getSrcAddressSpace() && "there should not be AS 0 -> AS X");
12715+
return I->getPointerOperand();
12716+
}
1266912717
if (auto *C = dyn_cast<ConstantExpr>(V))
12670-
if (C->getOpcode() == Instruction::AddrSpaceCast)
12671-
return peelAddrspacecast(C->getOperand(0));
12718+
if (C->getOpcode() == Instruction::AddrSpaceCast) {
12719+
assert(C->getOperand(0)->getType()->getPointerAddressSpace() &&
12720+
"there should not be AS 0 -> AS X");
12721+
return C->getOperand(0);
12722+
}
1267212723
return V;
1267312724
}
1267412725
};

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,38 @@ define void @foo(ptr addrspace(3) %val) {
243243
ret void
244244
}
245245

246+
define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
247+
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
248+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
249+
; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
250+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
251+
; CHECK-NEXT: ret void
252+
;
253+
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
254+
%p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
255+
store i32 %val, ptr %p.cast.1
256+
ret void
257+
}
258+
259+
define internal void @use_argument_after_promotion(ptr %p, i32 %val) {
260+
; CHECK-LABEL: define internal void @use_argument_after_promotion(
261+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
262+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
263+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
264+
; CHECK-NEXT: ret void
265+
;
266+
store i32 %val, ptr %p
267+
ret void
268+
}
269+
270+
define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val) {
271+
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_inter_procedure(
272+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
273+
; CHECK-NEXT: call void @use_argument_after_promotion(ptr [[P]], i32 [[VAL]])
274+
; CHECK-NEXT: ret void
275+
;
276+
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
277+
%p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
278+
call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
279+
ret void
280+
}

llvm/test/CodeGen/AMDGPU/addrspacecast.ll

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,14 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
215215
}
216216

217217
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
218+
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
219+
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
220+
221+
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
218222

219223
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
220-
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
221224
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
222-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
225+
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
223226
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
224227
%cast = addrspacecast ptr addrspace(3) null to ptr
225228
store volatile i32 7, ptr %cast
@@ -259,10 +262,14 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
259262

260263
; FIXME: Shouldn't need to enable queue ptr
261264
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
265+
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
266+
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
267+
268+
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
269+
262270
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
263-
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
264271
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
265-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
272+
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
266273
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
267274
%cast = addrspacecast ptr addrspace(5) null to ptr
268275
store volatile i32 7, ptr %cast

0 commit comments

Comments
 (0)