Skip to content

Commit 24f04c8

Browse files
committed
Make TargetLowering::getValueType() virtual to fix <N x ptr(7)> crash
Even though ptr addrspace(7) is rewritten away into more primitive constructs before reaching SelectionDAG or GlobalISel, we stil sometimes need to query properties like how many registers it will require. We already had an existing workaroung that would map ptr addrspace(7) (and addrspace(9)) to MVT::{5,6}i32, their ultimate in-register representations, in overloads of TargetLowering::getPointerType(DL, AddressSpace). However, whenever TargetLowering::getValue() tried to construct a vector VT out of those vector MVTs, the vector constructor would assert because you can't have a vector of vectors. This commit solves the crash by manually overriding getValueTy() and getMemValueType() in the AMDGPU TargetLowering. This is something of a big hammer, and I'm open to suggestions for a more precise change.
1 parent 9cc7ee1 commit 24f04c8

File tree

4 files changed

+82
-5
lines changed

4 files changed

+82
-5
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,8 +1675,8 @@ class TargetLoweringBase {
16751675
/// operations except for the pointer size. If AllowUnknown is true, this
16761676
/// will return MVT::Other for types with no EVT counterpart (e.g. structs),
16771677
/// otherwise it will assert.
1678-
EVT getValueType(const DataLayout &DL, Type *Ty,
1679-
bool AllowUnknown = false) const {
1678+
virtual EVT getValueType(const DataLayout &DL, Type *Ty,
1679+
bool AllowUnknown = false) const {
16801680
// Lower scalar pointers to native pointer types.
16811681
if (auto *PTy = dyn_cast<PointerType>(Ty))
16821682
return getPointerTy(DL, PTy->getAddressSpace());
@@ -1695,8 +1695,8 @@ class TargetLoweringBase {
16951695
return EVT::getEVT(Ty, AllowUnknown);
16961696
}
16971697

1698-
EVT getMemValueType(const DataLayout &DL, Type *Ty,
1699-
bool AllowUnknown = false) const {
1698+
virtual EVT getMemValueType(const DataLayout &DL, Type *Ty,
1699+
bool AllowUnknown = false) const {
17001700
// Lower scalar pointers to native pointer types.
17011701
if (auto *PTy = dyn_cast<PointerType>(Ty))
17021702
return getPointerMemTy(DL, PTy->getAddressSpace());
@@ -1714,7 +1714,6 @@ class TargetLoweringBase {
17141714
return getValueType(DL, Ty, AllowUnknown);
17151715
}
17161716

1717-
17181717
/// Return the MVT corresponding to this LLVM type. See getValueType.
17191718
MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
17201719
bool AllowUnknown = false) const {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,41 @@ MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
12031203
return AMDGPUTargetLowering::getPointerMemTy(DL, AS);
12041204
}
12051205

1206+
/// Passes like the loop vectorizer will, for example, try to query the size in
1207+
/// registers of buffer fat pointer. They don't exist by the time we reach
1208+
/// codegen, but these queries can still come in. Unfortunately, something like
1209+
/// <2 x ptr addrspace(7)> will get lowered to <2 x v5i32> by the workarounds
1210+
/// above, which causes a crash. Handle this case here.
1211+
EVT SITargetLowering::getValueType(const DataLayout &DL, Type *Ty,
1212+
bool AllowUnknown) const {
1213+
if (auto *VT = dyn_cast<VectorType>(Ty)) {
1214+
if (auto *PT = dyn_cast<PointerType>(VT->getElementType())) {
1215+
MVT MET = getPointerTy(DL, PT->getAddressSpace());
1216+
if (MET.isVector() && MET.getVectorElementType() == MVT::i32) {
1217+
return EVT::getVectorVT(
1218+
Ty->getContext(), EVT(MET.getVectorElementType()),
1219+
VT->getElementCount() * MET.getVectorNumElements());
1220+
}
1221+
}
1222+
}
1223+
return AMDGPUTargetLowering::getValueType(DL, Ty, AllowUnknown);
1224+
}
1225+
1226+
EVT SITargetLowering::getMemValueType(const DataLayout &DL, Type *Ty,
1227+
bool AllowUnknown) const {
1228+
if (auto *VT = dyn_cast<VectorType>(Ty)) {
1229+
if (auto *PT = dyn_cast<PointerType>(VT->getElementType())) {
1230+
MVT ScalarTy = getPointerMemTy(DL, PT->getAddressSpace());
1231+
if (ScalarTy.isVector() && ScalarTy.getVectorElementType() == MVT::i32) {
1232+
return EVT::getVectorVT(
1233+
Ty->getContext(), EVT(ScalarTy.getVectorElementType()),
1234+
VT->getElementCount() * ScalarTy.getVectorNumElements());
1235+
}
1236+
}
1237+
}
1238+
return AMDGPUTargetLowering::getValueType(DL, Ty, AllowUnknown);
1239+
}
1240+
12061241
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12071242
const CallInst &CI,
12081243
MachineFunction &MF,

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
306306
// so, to work around the lack of i160, map it to v5i32.
307307
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
308308
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
309+
EVT getValueType(const DataLayout &DL, Type *Ty,
310+
bool AllowUnknown = false) const override;
311+
EVT getMemValueType(const DataLayout &DL, Type *Ty,
312+
bool AllowUnknown = false) const override;
309313

310314
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
311315
MachineFunction &MF,
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=loop-vectorize -S < %s | FileCheck %s
3+
4+
; Reduced from a crash, variables added to make things more realistic.
5+
; This is a roundabout test for TargetLowering::getValueType() returning
6+
; a reasonable value for <N x p7> instead of asserting.
7+
define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspace(1) %.ptr, i64 %0) {
8+
; CHECK-LABEL: define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(
9+
; CHECK-SAME: ptr addrspace(1) [[DOTPTR:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[_LR_PH5:.*:]]
11+
; CHECK-NEXT: [[DOTRSRC:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) [[DOTPTR]], i16 0, i32 -2147483648, i32 159744)
12+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(8) [[DOTRSRC]] to ptr addrspace(7)
13+
; CHECK-NEXT: br label %[[BB2:.*]]
14+
; CHECK: [[BB2]]:
15+
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[DOTLR_PH5:%.*]] ], [ [[TMP5:%.*]], %[[BB2]] ]
16+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP1]], i32 0
17+
; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], 1
18+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP3]], [[TMP0]]
19+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], [[DOT_CRIT_EDGE_LOOPEXIT:label %.*]], label %[[BB2]]
20+
; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]]
21+
; CHECK-NEXT: ret void
22+
;
23+
.lr.ph5:
24+
%.rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %.ptr, i16 0, i32 2147483648, i32 159744)
25+
%1 = addrspacecast ptr addrspace(8) %.rsrc to ptr addrspace(7)
26+
br label %2
27+
28+
2: ; preds = %2, %.lr.ph5
29+
%3 = phi i64 [ 0, %.lr.ph5 ], [ %5, %2 ]
30+
%4 = getelementptr i32, ptr addrspace(7) %1, i32 0
31+
%5 = add i64 %3, 1
32+
%exitcond.not = icmp eq i64 %3, %0
33+
br i1 %exitcond.not, label %._crit_edge.loopexit, label %2
34+
35+
._crit_edge.loopexit: ; preds = %2
36+
ret void
37+
}
38+
39+
declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) readnone, i16, i32, i32)

0 commit comments

Comments
 (0)