Skip to content

Commit 3211bfc

Browse files
committed
[Attributor] Take the address space from addrspacecast directly
If the value to be analyzed is directly from addrspacecast, we take the source address space directly. This is to improve the case where in `AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by insertting an addrspacecast directly from a generic pointer. However, during the analysis, the underlying object will be the generic pointer, instead of the addrspacecast, thus the inferred address space is the generic one, which is not ideal.
1 parent 0909e30 commit 3211bfc

File tree

4 files changed

+112
-25
lines changed

4 files changed

+112
-25
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6249,7 +6249,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
62496249
/// Return the address space of the associated value. \p NoAddressSpace is
62506250
/// returned if the associated value is dead. This functions is not supposed
62516251
/// to be called if the AA is invalid.
6252-
virtual int32_t getAddressSpace() const = 0;
6252+
virtual uint32_t getAddressSpace() const = 0;
62536253

62546254
/// Create an abstract attribute view for the position \p IRP.
62556255
static AAAddressSpace &createForPosition(const IRPosition &IRP,
@@ -6268,7 +6268,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
62686268
}
62696269

62706270
// No address space which indicates the associated value is dead.
6271-
static const int32_t NoAddressSpace = -1;
6271+
static const uint32_t NoAddressSpace = ~0U;
62726272

62736273
/// Unique ID (due to the unique address)
62746274
static const char ID;

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12562,7 +12562,7 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1256212562
AAAddressSpaceImpl(const IRPosition &IRP, Attributor &A)
1256312563
: AAAddressSpace(IRP, A) {}
1256412564

12565-
int32_t getAddressSpace() const override {
12565+
uint32_t getAddressSpace() const override {
1256612566
assert(isValidState() && "the AA is invalid");
1256712567
return AssumedAddressSpace;
1256812568
}
@@ -12571,17 +12571,59 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1257112571
void initialize(Attributor &A) override {
1257212572
assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
1257312573
"Associated value is not a pointer");
12574-
if (getAssociatedType()->getPointerAddressSpace())
12574+
// If the pointer already has non-generic address space, we assume it is the
12575+
// correct one.
12576+
if (getAssociatedType()->getPointerAddressSpace()) {
12577+
[[maybe_unused]] bool R =
12578+
takeAddressSpace(getAssociatedType()->getPointerAddressSpace());
12579+
assert(R && "the take should happen");
1257512580
indicateOptimisticFixpoint();
12581+
return;
12582+
}
12583+
// If the pointer is an addrspacecast, we assume the source address space is
12584+
// the correct one.
12585+
Value *V = &getAssociatedValue();
12586+
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(V)) {
12587+
[[maybe_unused]] bool R = takeAddressSpace(ASC->getSrcAddressSpace());
12588+
assert(R && "the take should happen");
12589+
indicateOptimisticFixpoint();
12590+
return;
12591+
}
12592+
if (auto *C = dyn_cast<ConstantExpr>(V)) {
12593+
if (C->getOpcode() == Instruction::AddrSpaceCast) {
12594+
[[maybe_unused]] bool R = takeAddressSpace(
12595+
C->getOperand(0)->getType()->getPointerAddressSpace());
12596+
assert(R && "the take should happen");
12597+
indicateOptimisticFixpoint();
12598+
return;
12599+
}
12600+
}
1257612601
}
1257712602

1257812603
ChangeStatus updateImpl(Attributor &A) override {
12579-
int32_t OldAddressSpace = AssumedAddressSpace;
12604+
uint32_t OldAddressSpace = AssumedAddressSpace;
1258012605
auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
1258112606
DepClassTy::REQUIRED);
1258212607
auto Pred = [&](Value &Obj) {
1258312608
if (isa<UndefValue>(&Obj))
1258412609
return true;
12610+
// If an argument in generic address space has addrspace cast uses, and
12611+
// those casts are same, then we take the dst addrspace.
12612+
if (auto *Arg = dyn_cast<Argument>(&Obj)) {
12613+
if (Arg->getType()->getPointerAddressSpace() == 0) {
12614+
unsigned CastAddrSpace = 0;
12615+
for (auto *U : Arg->users()) {
12616+
auto *ASCI = dyn_cast<AddrSpaceCastInst>(U);
12617+
if (!ASCI)
12618+
continue;
12619+
if (CastAddrSpace && CastAddrSpace != ASCI->getDestAddressSpace())
12620+
return false;
12621+
CastAddrSpace = ASCI->getDestAddressSpace();
12622+
}
12623+
if (CastAddrSpace)
12624+
return takeAddressSpace(CastAddrSpace);
12625+
}
12626+
}
1258512627
return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
1258612628
};
1258712629

@@ -12594,19 +12636,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1259412636

1259512637
/// See AbstractAttribute::manifest(...).
1259612638
ChangeStatus manifest(Attributor &A) override {
12597-
Value *AssociatedValue = &getAssociatedValue();
12598-
Value *OriginalValue = peelAddrspacecast(AssociatedValue);
12599-
if (getAddressSpace() == NoAddressSpace ||
12600-
static_cast<uint32_t>(getAddressSpace()) ==
12601-
getAssociatedType()->getPointerAddressSpace())
12639+
unsigned NewAS = getAddressSpace();
12640+
if (NewAS == NoAddressSpace ||
12641+
NewAS == getAssociatedType()->getPointerAddressSpace())
1260212642
return ChangeStatus::UNCHANGED;
1260312643

12604-
PointerType *NewPtrTy =
12605-
PointerType::get(getAssociatedType()->getContext(),
12606-
static_cast<uint32_t>(getAddressSpace()));
12644+
Value *AssociatedValue = &getAssociatedValue();
12645+
Value *OriginalValue = peelAddrspacecast(AssociatedValue);
1260712646
bool UseOriginalValue =
12608-
OriginalValue->getType()->getPointerAddressSpace() ==
12609-
static_cast<uint32_t>(getAddressSpace());
12647+
OriginalValue->getType()->getPointerAddressSpace() == NewAS;
12648+
PointerType *NewPtrTy =
12649+
PointerType::get(getAssociatedType()->getContext(), NewAS);
1261012650

1261112651
bool Changed = false;
1261212652

@@ -12656,9 +12696,9 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1265612696
}
1265712697

1265812698
private:
12659-
int32_t AssumedAddressSpace = NoAddressSpace;
12699+
uint32_t AssumedAddressSpace = NoAddressSpace;
1266012700

12661-
bool takeAddressSpace(int32_t AS) {
12701+
bool takeAddressSpace(uint32_t AS) {
1266212702
if (AssumedAddressSpace == NoAddressSpace) {
1266312703
AssumedAddressSpace = AS;
1266412704
return true;
@@ -12667,11 +12707,16 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1266712707
}
1266812708

1266912709
static Value *peelAddrspacecast(Value *V) {
12670-
if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
12671-
return peelAddrspacecast(I->getPointerOperand());
12710+
if (auto *I = dyn_cast<AddrSpaceCastInst>(V)) {
12711+
assert(I->getSrcAddressSpace() && "there should not be AS 0 -> AS X");
12712+
return I->getPointerOperand();
12713+
}
1267212714
if (auto *C = dyn_cast<ConstantExpr>(V))
12673-
if (C->getOpcode() == Instruction::AddrSpaceCast)
12674-
return peelAddrspacecast(C->getOperand(0));
12715+
if (C->getOpcode() == Instruction::AddrSpaceCast) {
12716+
assert(C->getOperand(0)->getType()->getPointerAddressSpace() &&
12717+
"there should not be AS 0 -> AS X");
12718+
return C->getOperand(0);
12719+
}
1267512720
return V;
1267612721
}
1267712722
};

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,38 @@ define void @foo(ptr addrspace(3) %val) {
243243
ret void
244244
}
245245

246+
define void @kernel_argument_promotion_pattern_intra_procedure(ptr %p, i32 %val) {
247+
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_intra_procedure(
248+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
249+
; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
250+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
251+
; CHECK-NEXT: ret void
252+
;
253+
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
254+
%p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
255+
store i32 %val, ptr %p.cast.1
256+
ret void
257+
}
258+
259+
define internal void @use_kernel_argument_after_promotion(ptr %p, i32 %val) {
260+
; CHECK-LABEL: define internal void @use_kernel_argument_after_promotion(
261+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
262+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
263+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[TMP1]], align 4
264+
; CHECK-NEXT: ret void
265+
;
266+
store i32 %val, ptr %p
267+
ret void
268+
}
269+
270+
define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val) {
271+
; CHECK-LABEL: define void @kernel_argument_promotion_pattern_inter_procedure(
272+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
273+
; CHECK-NEXT: call void @use_kernel_argument_after_promotion(ptr [[P]], i32 [[VAL]])
274+
; CHECK-NEXT: ret void
275+
;
276+
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
277+
%p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
278+
call void @use_kernel_argument_after_promotion(ptr %p.cast.1, i32 %val)
279+
ret void
280+
}

llvm/test/CodeGen/AMDGPU/addrspacecast.ll

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,14 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
215215
}
216216

217217
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
218+
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
219+
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
220+
221+
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
218222

219223
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
220-
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
221224
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
222-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
225+
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
223226
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
224227
%cast = addrspacecast ptr addrspace(3) null to ptr
225228
store volatile i32 7, ptr %cast
@@ -259,10 +262,14 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
259262

260263
; FIXME: Shouldn't need to enable queue ptr
261264
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
265+
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
266+
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
267+
268+
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
269+
262270
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
263-
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
264271
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
265-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
272+
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
266273
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
267274
%cast = addrspacecast ptr addrspace(5) null to ptr
268275
store volatile i32 7, ptr %cast

0 commit comments

Comments
 (0)