Skip to content

Commit dbb5c4d

Browse files
committed
Rework PR per Matt
1 parent 24f04c8 commit dbb5c4d

File tree

4 files changed

+36
-59
lines changed

4 files changed

+36
-59
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,40 +1675,56 @@ class TargetLoweringBase {
16751675
/// operations except for the pointer size. If AllowUnknown is true, this
16761676
/// will return MVT::Other for types with no EVT counterpart (e.g. structs),
16771677
/// otherwise it will assert.
1678-
virtual EVT getValueType(const DataLayout &DL, Type *Ty,
1679-
bool AllowUnknown = false) const {
1678+
EVT getValueType(const DataLayout &DL, Type *Ty,
1679+
bool AllowUnknown = false) const {
16801680
// Lower scalar pointers to native pointer types.
16811681
if (auto *PTy = dyn_cast<PointerType>(Ty))
16821682
return getPointerTy(DL, PTy->getAddressSpace());
16831683

16841684
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
16851685
Type *EltTy = VTy->getElementType();
1686+
ElementCount EC = VTy->getElementCount();
16861687
// Lower vectors of pointers to native pointer types.
16871688
if (auto *PTy = dyn_cast<PointerType>(EltTy)) {
16881689
EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
1690+
// Kludge around AMDGPU's fat pointers which, while not lowered to
1691+
// codegen, still needed an MVT, and could only use vectors because
1692+
// there weren't big enough scalars. Therefore, flatten the nominal
1693+
// vector-of-vectors.
1694+
if (PointerTy.isVector()) {
1695+
EC = EC * PointerTy.getVectorNumElements();
1696+
PointerTy = PointerTy.getVectorElementType();
1697+
}
16891698
EltTy = PointerTy.getTypeForEVT(Ty->getContext());
16901699
}
1691-
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
1692-
VTy->getElementCount());
1700+
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), EC);
16931701
}
16941702

16951703
return EVT::getEVT(Ty, AllowUnknown);
16961704
}
16971705

1698-
virtual EVT getMemValueType(const DataLayout &DL, Type *Ty,
1699-
bool AllowUnknown = false) const {
1706+
EVT getMemValueType(const DataLayout &DL, Type *Ty,
1707+
bool AllowUnknown = false) const {
17001708
// Lower scalar pointers to native pointer types.
17011709
if (auto *PTy = dyn_cast<PointerType>(Ty))
17021710
return getPointerMemTy(DL, PTy->getAddressSpace());
17031711

17041712
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
17051713
Type *EltTy = VTy->getElementType();
1714+
ElementCount EC = VTy->getElementCount();
17061715
if (auto *PTy = dyn_cast<PointerType>(EltTy)) {
17071716
EVT PointerTy(getPointerMemTy(DL, PTy->getAddressSpace()));
1717+
// Kludge around AMDGPU's fat pointers which, while not lowered to
1718+
// codegen, still needed an MVT, and could only use vectors because
1719+
// there weren't big enough scalars. Therefore, flatten the nominal
1720+
// vector-of-vectors.
1721+
if (PointerTy.isVector()) {
1722+
EC = EC * PointerTy.getVectorNumElements();
1723+
PointerTy = PointerTy.getVectorElementType();
1724+
}
17081725
EltTy = PointerTy.getTypeForEVT(Ty->getContext());
17091726
}
1710-
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
1711-
VTy->getElementCount());
1727+
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), EC);
17121728
}
17131729

17141730
return getValueType(DL, Ty, AllowUnknown);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,41 +1203,6 @@ MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
12031203
return AMDGPUTargetLowering::getPointerMemTy(DL, AS);
12041204
}
12051205

1206-
/// Passes like the loop vectorizer will, for example, try to query the size in
1207-
/// registers of buffer fat pointer. They don't exist by the time we reach
1208-
/// codegen, but these queries can still come in. Unfortunately, something like
1209-
/// <2 x ptr addrspace(7)> will get lowered to <2 x v5i32> by the workarounds
1210-
/// above, which causes a crash. Handle this case here.
1211-
EVT SITargetLowering::getValueType(const DataLayout &DL, Type *Ty,
1212-
bool AllowUnknown) const {
1213-
if (auto *VT = dyn_cast<VectorType>(Ty)) {
1214-
if (auto *PT = dyn_cast<PointerType>(VT->getElementType())) {
1215-
MVT MET = getPointerTy(DL, PT->getAddressSpace());
1216-
if (MET.isVector() && MET.getVectorElementType() == MVT::i32) {
1217-
return EVT::getVectorVT(
1218-
Ty->getContext(), EVT(MET.getVectorElementType()),
1219-
VT->getElementCount() * MET.getVectorNumElements());
1220-
}
1221-
}
1222-
}
1223-
return AMDGPUTargetLowering::getValueType(DL, Ty, AllowUnknown);
1224-
}
1225-
1226-
EVT SITargetLowering::getMemValueType(const DataLayout &DL, Type *Ty,
1227-
bool AllowUnknown) const {
1228-
if (auto *VT = dyn_cast<VectorType>(Ty)) {
1229-
if (auto *PT = dyn_cast<PointerType>(VT->getElementType())) {
1230-
MVT ScalarTy = getPointerMemTy(DL, PT->getAddressSpace());
1231-
if (ScalarTy.isVector() && ScalarTy.getVectorElementType() == MVT::i32) {
1232-
return EVT::getVectorVT(
1233-
Ty->getContext(), EVT(ScalarTy.getVectorElementType()),
1234-
VT->getElementCount() * ScalarTy.getVectorNumElements());
1235-
}
1236-
}
1237-
}
1238-
return AMDGPUTargetLowering::getValueType(DL, Ty, AllowUnknown);
1239-
}
1240-
12411206
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12421207
const CallInst &CI,
12431208
MachineFunction &MF,

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,10 +306,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
306306
// so, to work around the lack of i160, map it to v5i32.
307307
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
308308
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
309-
EVT getValueType(const DataLayout &DL, Type *Ty,
310-
bool AllowUnknown = false) const override;
311-
EVT getMemValueType(const DataLayout &DL, Type *Ty,
312-
bool AllowUnknown = false) const override;
313309

314310
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
315311
MachineFunction &MF,

llvm/test/Transforms/LoopVectorize/AMDGPU/buffer-fat-pointer.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; Reduced from a crash, variables added to make things more realistic.
55
; This is a roundabout test for TargetLowering::getValueType() returning
66
; a reasonable value for <N x p7> instead of asserting.
7-
define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspace(1) %.ptr, i64 %0) {
7+
define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspace(1) %.ptr, i64 %v) {
88
; CHECK-LABEL: define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(
99
; CHECK-SAME: ptr addrspace(1) [[DOTPTR:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
1010
; CHECK-NEXT: [[_LR_PH5:.*:]]
@@ -20,19 +20,19 @@ define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspac
2020
; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]]
2121
; CHECK-NEXT: ret void
2222
;
23-
.lr.ph5:
24-
%.rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %.ptr, i16 0, i32 2147483648, i32 159744)
25-
%1 = addrspacecast ptr addrspace(8) %.rsrc to ptr addrspace(7)
26-
br label %2
23+
entry:
24+
%rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %.ptr, i16 0, i32 2147483648, i32 159744)
25+
%fat = addrspacecast ptr addrspace(8) %rsrc to ptr addrspace(7)
26+
br label %loop
2727

28-
2: ; preds = %2, %.lr.ph5
29-
%3 = phi i64 [ 0, %.lr.ph5 ], [ %5, %2 ]
30-
%4 = getelementptr i32, ptr addrspace(7) %1, i32 0
31-
%5 = add i64 %3, 1
32-
%exitcond.not = icmp eq i64 %3, %0
33-
br i1 %exitcond.not, label %._crit_edge.loopexit, label %2
28+
loop: ; preds = %loop, %entry
29+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
30+
%ptr = getelementptr i32, ptr addrspace(7) %fat, i32 0
31+
%iv.next = add i64 %iv, 1
32+
%exitcond.not = icmp eq i64 %iv, %v
33+
br i1 %exitcond.not, label %exit, label %loop
3434

35-
._crit_edge.loopexit: ; preds = %2
35+
exit: ; preds = %exit
3636
ret void
3737
}
3838

0 commit comments

Comments
 (0)