Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1675,8 +1675,8 @@ class TargetLoweringBase {
/// operations except for the pointer size. If AllowUnknown is true, this
/// will return MVT::Other for types with no EVT counterpart (e.g. structs),
/// otherwise it will assert.
EVT getValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const {
virtual EVT getValueType(const DataLayout &DL, Type *Ty,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to directly handle this in getValueType, rather than adding yet another target knob for really low level implementation details. Is this used in any meaningful way? Can we just return an MVT::Other or something in the unhandled case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added some handling to getValueType()

And ... I'm not sure. The getValueType() came in from the loop vectorizer wanting to know how many registers would be needed to hold some value, and that led to the crash. It didn't help that, per comments from the last workaround of this sort I needed, there're a lot of pre-codegen uses of MVT getPointerTYpe(unsigned AS); that needed to be worked around with MVT::v5i32

bool AllowUnknown = false) const {
// Lower scalar pointers to native pointer types.
if (auto *PTy = dyn_cast<PointerType>(Ty))
return getPointerTy(DL, PTy->getAddressSpace());
Expand All @@ -1695,8 +1695,8 @@ class TargetLoweringBase {
return EVT::getEVT(Ty, AllowUnknown);
}

EVT getMemValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const {
virtual EVT getMemValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const {
// Lower scalar pointers to native pointer types.
if (auto *PTy = dyn_cast<PointerType>(Ty))
return getPointerMemTy(DL, PTy->getAddressSpace());
Expand All @@ -1714,7 +1714,6 @@ class TargetLoweringBase {
return getValueType(DL, Ty, AllowUnknown);
}


/// Return the MVT corresponding to this LLVM type. See getValueType.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const {
Expand Down
35 changes: 35 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,41 @@ MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
return AMDGPUTargetLowering::getPointerMemTy(DL, AS);
}

/// Passes like the loop vectorizer will, for example, try to query the size in
/// registers of buffer fat pointer. They don't exist by the time we reach
/// codegen, but these queries can still come in. Unfortunately, something like
/// <2 x ptr addrspace(7)> will get lowered to <2 x v5i32> by the workarounds
/// above, which causes a crash. Handle this case here.
EVT SITargetLowering::getValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown) const {
if (auto *VT = dyn_cast<VectorType>(Ty)) {
if (auto *PT = dyn_cast<PointerType>(VT->getElementType())) {
MVT MET = getPointerTy(DL, PT->getAddressSpace());
if (MET.isVector() && MET.getVectorElementType() == MVT::i32) {
return EVT::getVectorVT(
Ty->getContext(), EVT(MET.getVectorElementType()),
VT->getElementCount() * MET.getVectorNumElements());
}
}
}
return AMDGPUTargetLowering::getValueType(DL, Ty, AllowUnknown);
}

EVT SITargetLowering::getMemValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown) const {
if (auto *VT = dyn_cast<VectorType>(Ty)) {
if (auto *PT = dyn_cast<PointerType>(VT->getElementType())) {
MVT ScalarTy = getPointerMemTy(DL, PT->getAddressSpace());
if (ScalarTy.isVector() && ScalarTy.getVectorElementType() == MVT::i32) {
return EVT::getVectorVT(
Ty->getContext(), EVT(ScalarTy.getVectorElementType()),
VT->getElementCount() * ScalarTy.getVectorNumElements());
}
}
}
return AMDGPUTargetLowering::getValueType(DL, Ty, AllowUnknown);
}

bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
// so, to work around the lack of i160, map it to v5i32.
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
EVT getValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const override;
EVT getMemValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const override;

bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
MachineFunction &MF,
Expand Down
39 changes: 39 additions & 0 deletions llvm/test/Transforms/LoopVectorize/AMDGPU/buffer-fat-pointer.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=loop-vectorize -S < %s | FileCheck %s

; Reduced from a crash, variables added to make things more realistic.
; This is a roundabout test for TargetLowering::getValueType() returning
; a reasonable value for <N x p7> instead of asserting.
define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspace(1) %.ptr, i64 %0) {
; CHECK-LABEL: define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(
; CHECK-SAME: ptr addrspace(1) [[DOTPTR:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[_LR_PH5:.*:]]
; CHECK-NEXT: [[DOTRSRC:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) [[DOTPTR]], i16 0, i32 -2147483648, i32 159744)
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(8) [[DOTRSRC]] to ptr addrspace(7)
; CHECK-NEXT: br label %[[BB2:.*]]
; CHECK: [[BB2]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[DOTLR_PH5:%.*]] ], [ [[TMP5:%.*]], %[[BB2]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP1]], i32 0
; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP3]], [[TMP0]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], [[DOT_CRIT_EDGE_LOOPEXIT:label %.*]], label %[[BB2]]
; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]]
; CHECK-NEXT: ret void
;
.lr.ph5:
%.rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %.ptr, i16 0, i32 2147483648, i32 159744)
%1 = addrspacecast ptr addrspace(8) %.rsrc to ptr addrspace(7)
br label %2

2: ; preds = %2, %.lr.ph5
%3 = phi i64 [ 0, %.lr.ph5 ], [ %5, %2 ]
%4 = getelementptr i32, ptr addrspace(7) %1, i32 0
%5 = add i64 %3, 1
%exitcond.not = icmp eq i64 %3, %0
br i1 %exitcond.not, label %._crit_edge.loopexit, label %2

._crit_edge.loopexit: ; preds = %2
ret void
}

declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) readnone, i16, i32, i32)