Skip to content

Commit e168d2b

Browse files
committed
[Attributor] Take the address space from addrspacecast directly
If the value to be analyzed is directly from addrspacecast, we take the source address space directly. This is to improve the case where in `AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by insertting an addrspacecast directly from a generic pointer. However, during the analysis, the underlying object will be the generic pointer, instead of the addrspacecast, thus the inferred address space is the generic one, which is not ideal.
1 parent 8625eb0 commit e168d2b

File tree

4 files changed

+72
-25
lines changed

4 files changed

+72
-25
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6249,7 +6249,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
62496249
/// Return the address space of the associated value. \p NoAddressSpace is
62506250
/// returned if the associated value is dead. This functions is not supposed
62516251
/// to be called if the AA is invalid.
6252-
virtual int32_t getAddressSpace() const = 0;
6252+
virtual uint32_t getAddressSpace() const = 0;
62536253

62546254
/// Create an abstract attribute view for the position \p IRP.
62556255
static AAAddressSpace &createForPosition(const IRPosition &IRP,
@@ -6268,7 +6268,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
62686268
}
62696269

62706270
// No address space which indicates the associated value is dead.
6271-
static const int32_t NoAddressSpace = -1;
6271+
static const uint32_t NoAddressSpace = ~0U;
62726272

62736273
/// Unique ID (due to the unique address)
62746274
static const char ID;

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12562,7 +12562,7 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1256212562
AAAddressSpaceImpl(const IRPosition &IRP, Attributor &A)
1256312563
: AAAddressSpace(IRP, A) {}
1256412564

12565-
int32_t getAddressSpace() const override {
12565+
uint32_t getAddressSpace() const override {
1256612566
assert(isValidState() && "the AA is invalid");
1256712567
return AssumedAddressSpace;
1256812568
}
@@ -12571,12 +12571,37 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1257112571
void initialize(Attributor &A) override {
1257212572
assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
1257312573
"Associated value is not a pointer");
12574-
if (getAssociatedType()->getPointerAddressSpace())
12574+
// If the pointer already has non-generic address space, we assume it is the
12575+
// correct one.
12576+
if (getAssociatedType()->getPointerAddressSpace()) {
12577+
[[maybe_unused]] bool R =
12578+
takeAddressSpace(getAssociatedType()->getPointerAddressSpace());
12579+
assert(R && "the take should happen");
1257512580
indicateOptimisticFixpoint();
12581+
return;
12582+
}
12583+
// If the pointer is an addrspacecast, we assume the source address space is
12584+
// the correct one.
12585+
Value *V = &getAssociatedValue();
12586+
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(V)) {
12587+
[[maybe_unused]] bool R = takeAddressSpace(ASC->getSrcAddressSpace());
12588+
assert(R && "the take should happen");
12589+
indicateOptimisticFixpoint();
12590+
return;
12591+
}
12592+
if (auto *C = dyn_cast<ConstantExpr>(V)) {
12593+
if (C->getOpcode() == Instruction::AddrSpaceCast) {
12594+
[[maybe_unused]] bool R = takeAddressSpace(
12595+
C->getOperand(0)->getType()->getPointerAddressSpace());
12596+
assert(R && "the take should happen");
12597+
indicateOptimisticFixpoint();
12598+
return;
12599+
}
12600+
}
1257612601
}
1257712602

1257812603
ChangeStatus updateImpl(Attributor &A) override {
12579-
int32_t OldAddressSpace = AssumedAddressSpace;
12604+
uint32_t OldAddressSpace = AssumedAddressSpace;
1258012605
auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
1258112606
DepClassTy::REQUIRED);
1258212607
auto Pred = [&](Value &Obj) {
@@ -12594,19 +12619,17 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1259412619

1259512620
/// See AbstractAttribute::manifest(...).
1259612621
ChangeStatus manifest(Attributor &A) override {
12597-
Value *AssociatedValue = &getAssociatedValue();
12598-
Value *OriginalValue = peelAddrspacecast(AssociatedValue);
12599-
if (getAddressSpace() == NoAddressSpace ||
12600-
static_cast<uint32_t>(getAddressSpace()) ==
12601-
getAssociatedType()->getPointerAddressSpace())
12622+
unsigned NewAS = getAddressSpace();
12623+
if (NewAS == NoAddressSpace ||
12624+
NewAS == getAssociatedType()->getPointerAddressSpace())
1260212625
return ChangeStatus::UNCHANGED;
1260312626

12604-
PointerType *NewPtrTy =
12605-
PointerType::get(getAssociatedType()->getContext(),
12606-
static_cast<uint32_t>(getAddressSpace()));
12627+
Value *AssociatedValue = &getAssociatedValue();
12628+
Value *OriginalValue = peelAddrspacecast(AssociatedValue);
1260712629
bool UseOriginalValue =
12608-
OriginalValue->getType()->getPointerAddressSpace() ==
12609-
static_cast<uint32_t>(getAddressSpace());
12630+
OriginalValue->getType()->getPointerAddressSpace() == NewAS;
12631+
PointerType *NewPtrTy =
12632+
PointerType::get(getAssociatedType()->getContext(), NewAS);
1261012633

1261112634
bool Changed = false;
1261212635

@@ -12656,9 +12679,9 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1265612679
}
1265712680

1265812681
private:
12659-
int32_t AssumedAddressSpace = NoAddressSpace;
12682+
uint32_t AssumedAddressSpace = NoAddressSpace;
1266012683

12661-
bool takeAddressSpace(int32_t AS) {
12684+
bool takeAddressSpace(uint32_t AS) {
1266212685
if (AssumedAddressSpace == NoAddressSpace) {
1266312686
AssumedAddressSpace = AS;
1266412687
return true;
@@ -12667,11 +12690,16 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1266712690
}
1266812691

1266912692
static Value *peelAddrspacecast(Value *V) {
12670-
if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
12671-
return peelAddrspacecast(I->getPointerOperand());
12693+
if (auto *I = dyn_cast<AddrSpaceCastInst>(V)) {
12694+
assert(I->getSrcAddressSpace() && "there should not be AS 0 -> AS X");
12695+
return I->getPointerOperand();
12696+
}
1267212697
if (auto *C = dyn_cast<ConstantExpr>(V))
12673-
if (C->getOpcode() == Instruction::AddrSpaceCast)
12674-
return peelAddrspacecast(C->getOperand(0));
12698+
if (C->getOpcode() == Instruction::AddrSpaceCast) {
12699+
assert(C->getOperand(0)->getType()->getPointerAddressSpace() &&
12700+
"there should not be AS 0 -> AS X");
12701+
return C->getOperand(0);
12702+
}
1267512703
return V;
1267612704
}
1267712705
};

llvm/test/CodeGen/AMDGPU/aa-as-infer.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,15 @@ define void @foo(ptr addrspace(3) %val) {
243243
ret void
244244
}
245245

246+
define void @kernel_argument_promotion_pattern(ptr %p, i32 %val) {
247+
; CHECK-LABEL: define void @kernel_argument_promotion_pattern(
248+
; CHECK-SAME: ptr [[P:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
249+
; CHECK-NEXT: [[P_CAST_0:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1)
250+
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[P_CAST_0]], align 4
251+
; CHECK-NEXT: ret void
252+
;
253+
%p.cast.0 = addrspacecast ptr %p to ptr addrspace(1)
254+
%p.cast.1 = addrspacecast ptr addrspace(1) %p.cast.0 to ptr
255+
store i32 %val, ptr %p.cast.1
256+
ret void
257+
}

llvm/test/CodeGen/AMDGPU/addrspacecast.ll

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,14 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
215215
}
216216

217217
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
218+
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
219+
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
220+
221+
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
218222

219223
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
220-
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
221224
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
222-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
225+
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
223226
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
224227
%cast = addrspacecast ptr addrspace(3) null to ptr
225228
store volatile i32 7, ptr %cast
@@ -259,10 +262,14 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
259262

260263
; FIXME: Shouldn't need to enable queue ptr
261264
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
265+
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
266+
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
267+
268+
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
269+
262270
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
263-
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
264271
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
265-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], v[[K]]
272+
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
266273
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
267274
%cast = addrspacecast ptr addrspace(5) null to ptr
268275
store volatile i32 7, ptr %cast

0 commit comments

Comments
 (0)