Skip to content

Commit 70825eb

Browse files
committed
Address review comments
1 parent cceac3b commit 70825eb

File tree

2 files changed

+27
-30
lines changed

2 files changed

+27
-30
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ namespace {
605605
/// 1. Recursing into structs (and arrays that don't share a memory layout with
606606
/// vectors) since the intrinsics can't handle complex types.
607607
/// 2. Converting arrays of non-aggregate, byte-sized types into their
608-
/// correspondinng vectors
608+
/// corresponding vectors
609609
/// 3. Bitcasting unsupported types, namely overly-long scalars and byte
610610
/// vectors, into vectors of supported types.
611611
/// 4. Splitting up excessively long reads/writes into multiple operations.
@@ -727,10 +727,7 @@ Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(Type *T) {
727727
// Implicitly zero-extend to the next byte if needed
728728
if (!DL.typeSizeEqualsStoreSize(T))
729729
T = IRB.getIntNTy(Size.getFixedValue());
730-
Type *ElemTy = T;
731-
if (auto *VT = dyn_cast<FixedVectorType>(T)) {
732-
ElemTy = VT->getElementType();
733-
}
730+
Type *ElemTy = T->getScalarType();
734731
if (isa<PointerType, ScalableVectorType>(ElemTy))
735732
// Pointers are always big enough, and we'll let scalable vectors through to
736733
// fail in codegen.
@@ -758,11 +755,11 @@ Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(Type *T) {
758755
Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate(
759756
Value *V, Type *TargetType, const Twine &Name) {
760757
Type *SourceType = V->getType();
761-
if (DL.getTypeSizeInBits(SourceType) != DL.getTypeSizeInBits(TargetType)) {
762-
Type *ShortScalarTy =
763-
IRB.getIntNTy(DL.getTypeSizeInBits(SourceType).getFixedValue());
764-
Type *ByteScalarTy =
765-
IRB.getIntNTy(DL.getTypeSizeInBits(TargetType).getFixedValue());
758+
TypeSize SourceSize = DL.getTypeSizeInBits(SourceType);
759+
TypeSize TargetSize = DL.getTypeSizeInBits(TargetType);
760+
if (SourceSize != TargetSize) {
761+
Type *ShortScalarTy = IRB.getIntNTy(SourceSize.getFixedValue());
762+
Type *ByteScalarTy = IRB.getIntNTy(TargetSize.getFixedValue());
766763
Value *AsScalar = IRB.CreateBitCast(V, ShortScalarTy, Name + ".as.scalar");
767764
Value *Zext = IRB.CreateZExt(AsScalar, ByteScalarTy, Name + ".zext");
768765
V = Zext;
@@ -774,11 +771,11 @@ Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate(
774771
Value *LegalizeBufferContentTypesVisitor::makeIllegalNonAggregate(
775772
Value *V, Type *OrigType, const Twine &Name) {
776773
Type *LegalType = V->getType();
777-
if (DL.getTypeSizeInBits(LegalType) != DL.getTypeSizeInBits(OrigType)) {
778-
Type *ShortScalarTy =
779-
IRB.getIntNTy(DL.getTypeSizeInBits(OrigType).getFixedValue());
780-
Type *ByteScalarTy =
781-
IRB.getIntNTy(DL.getTypeSizeInBits(LegalType).getFixedValue());
774+
TypeSize LegalSize = DL.getTypeSizeInBits(LegalType);
775+
TypeSize OrigSize = DL.getTypeSizeInBits(OrigType);
776+
if (LegalSize != OrigSize) {
777+
Type *ShortScalarTy = IRB.getIntNTy(OrigSize.getFixedValue());
778+
Type *ByteScalarTy = IRB.getIntNTy(LegalSize.getFixedValue());
782779
Value *AsScalar = IRB.CreateBitCast(V, ByteScalarTy, Name + ".bytes.cast");
783780
Value *Trunc = IRB.CreateTrunc(AsScalar, ShortScalarTy, Name + ".trunc");
784781
return IRB.CreateBitCast(Trunc, OrigType, Name + ".orig");
@@ -791,6 +788,8 @@ Type *LegalizeBufferContentTypesVisitor::intrinsicTypeFor(Type *LegalType) {
791788
if (!VT)
792789
return LegalType;
793790
Type *ET = VT->getElementType();
791+
// Explicitly return the element type of 1-element vectors because the
792+
// underlying intrinsics don't like <1 x T> even though it's a synonym for T.
794793
if (VT->getNumElements() == 1)
795794
return ET;
796795
if (DL.getTypeSizeInBits(LegalType) == 96 && DL.getTypeSizeInBits(ET) < 32)
@@ -917,9 +916,8 @@ bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
917916
if (auto *AT = dyn_cast<ArrayType>(PartType)) {
918917
Type *ElemTy = AT->getElementType();
919918
TypeSize AllocSize = DL.getTypeAllocSize(ElemTy);
920-
if (!(ElemTy->isSingleValueType() &&
921-
DL.getTypeSizeInBits(ElemTy) == 8 * AllocSize &&
922-
!ElemTy->isVectorTy())) {
919+
if (!ElemTy->isSingleValueType() ||
920+
DL.getTypeSizeInBits(ElemTy) != 8 * AllocSize || ElemTy->isVectorTy()) {
923921
bool Changed = false;
924922
for (auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),
925923
/*Inclusive=*/false)) {
@@ -963,9 +961,7 @@ bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
963961
// type will be a vector (ex. an i256 load will have LegalType = <8 x i32>).
964962
// But if we're already a scalar (which can happen if we're splitting up a
965963
// struct), the element type will be the legal type itself.
966-
Type *ElemType = LegalType;
967-
if (auto *VT = dyn_cast<FixedVectorType>(LegalType))
968-
ElemType = VT->getElementType();
964+
Type *ElemType = LegalType->getScalarType();
969965
unsigned ElemBytes = DL.getTypeStoreSize(ElemType);
970966
AAMDNodes AANodes = OrigLI.getAAMetadata();
971967
if (IsAggPart && Slices.empty())
@@ -986,10 +982,8 @@ bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
986982
copyMetadataForLoad(*NewLI, OrigLI);
987983
NewLI->setAAMetadata(
988984
AANodes.adjustForAccess(ByteOffset, LoadableType, DL));
989-
if (OrigLI.isAtomic())
990-
NewLI->setAtomic(OrigLI.getOrdering(), OrigLI.getSyncScopeID());
991-
if (OrigLI.isVolatile())
992-
NewLI->setVolatile(OrigLI.isVolatile());
985+
NewLI->setAtomic(OrigLI.getOrdering(), OrigLI.getSyncScopeID());
986+
NewLI->setVolatile(OrigLI.isVolatile());
993987
Value *Loaded = IRB.CreateBitCast(NewLI, SliceType,
994988
NewLI->getName() + ".from.loadable");
995989
LoadsRes = insertSlice(LoadsRes, Loaded, S, Name);
@@ -1042,9 +1036,8 @@ std::pair<bool, bool> LegalizeBufferContentTypesVisitor::visitStoreImpl(
10421036
if (auto *AT = dyn_cast<ArrayType>(PartType)) {
10431037
Type *ElemTy = AT->getElementType();
10441038
TypeSize AllocSize = DL.getTypeAllocSize(ElemTy);
1045-
if (!(ElemTy->isSingleValueType() &&
1046-
DL.getTypeSizeInBits(ElemTy) == 8 * AllocSize &&
1047-
!ElemTy->isVectorTy())) {
1039+
if (!ElemTy->isSingleValueType() ||
1040+
DL.getTypeSizeInBits(ElemTy) != 8 * AllocSize || ElemTy->isVectorTy()) {
10481041
bool Changed = false;
10491042
for (auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),
10501043
/*Inclusive=*/false)) {

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.nxv2i32.fail.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1-
; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx900 < %s
2-
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s
1+
; Note: The exact error messages aren't important here, but are included to catch
2+
; anything changing.
3+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s 2>&1 \
4+
; RUN: | grep "LLVM ERROR: Scalarization of scalable vectors is not supported."
5+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s 2>&1 \
6+
; RUN: | grep "LLVM ERROR: Invalid size request on a scalable vector."
37
define void @buffer_store_nxv2i32(ptr addrspace(8) inreg %rsrc, i32 %offset) {
48
call void @llvm.amdgcn.raw.ptr.buffer.store.nxv2i32(<vscale x 2 x i32> poison, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
59
ret void

0 commit comments

Comments
 (0)