Skip to content

Commit 7499a48

Browse files
author
Leon Clark
committed
Move transform to VectorCombine and update tests.
1 parent 6f6fc11 commit 7499a48

File tree

6 files changed

+116
-112
lines changed

6 files changed

+116
-112
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 0 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -987,94 +987,6 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
987987
return true;
988988
}
989989

990-
// If `I` is a load instruction, used only by shufflevector instructions with
991-
// poison values, attempt to shrink the load to only the lanes being used.
992-
static bool shrinkLoadsForBroadcast(Instruction &I) {
993-
auto *OldLoad = dyn_cast<LoadInst>(&I);
994-
if (!OldLoad || !OldLoad->isSimple())
995-
return false;
996-
997-
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
998-
if (!VecTy)
999-
return false;
1000-
1001-
auto IsPoisonOrUndef = [](Value *V) -> bool {
1002-
if (auto *C = dyn_cast<Constant>(V)) {
1003-
return isa<PoisonValue>(C) || isa<UndefValue>(C);
1004-
}
1005-
return false;
1006-
};
1007-
1008-
using IndexRange = std::pair<unsigned, unsigned>;
1009-
auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
1010-
auto OutputRange = IndexRange(VecTy->getNumElements(), 0u);
1011-
for (auto &Use : I.uses()) {
1012-
// All uses must be ShuffleVector instructions.
1013-
auto *Shuffle = dyn_cast<ShuffleVectorInst>(Use.getUser());
1014-
if (!Shuffle)
1015-
return {};
1016-
1017-
// Get index range for value.
1018-
auto *Op0 = Shuffle->getOperand(0u);
1019-
auto *Op1 = Shuffle->getOperand(1u);
1020-
if (!IsPoisonOrUndef(Op1))
1021-
return {};
1022-
1023-
// Find the min and max indices used by the ShuffleVector instruction.
1024-
auto Mask = Shuffle->getShuffleMask();
1025-
auto *Op0Ty = cast<FixedVectorType>(Op0->getType());
1026-
auto NumElems = Op0Ty->getNumElements();
1027-
1028-
for (unsigned Index : Mask) {
1029-
if (Index < NumElems) {
1030-
OutputRange.first = std::min(Index, OutputRange.first);
1031-
OutputRange.second = std::max(Index, OutputRange.second);
1032-
}
1033-
}
1034-
}
1035-
return OutputRange;
1036-
};
1037-
1038-
if (auto Indices = GetIndexRangeInShuffles()) {
1039-
auto OldSize = VecTy->getNumElements();
1040-
auto NewSize = Indices->second + 1u;
1041-
1042-
if (NewSize < OldSize) {
1043-
auto Builder = IRBuilder(&I);
1044-
Builder.SetCurrentDebugLocation(I.getDebugLoc());
1045-
1046-
// Create new load of smaller vector.
1047-
auto *ElemTy = VecTy->getElementType();
1048-
auto *NewVecTy = FixedVectorType::get(ElemTy, NewSize);
1049-
auto *NewLoad = cast<LoadInst>(
1050-
Builder.CreateLoad(NewVecTy, OldLoad->getPointerOperand()));
1051-
NewLoad->copyMetadata(I);
1052-
1053-
// Replace all users.
1054-
auto OldShuffles = SmallVector<ShuffleVectorInst *, 4u>{};
1055-
for (auto &Use : I.uses()) {
1056-
auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
1057-
1058-
Builder.SetInsertPoint(Shuffle);
1059-
Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc());
1060-
auto *NewShuffle = Builder.CreateShuffleVector(
1061-
NewLoad, PoisonValue::get(NewVecTy), Shuffle->getShuffleMask());
1062-
1063-
Shuffle->replaceAllUsesWith(NewShuffle);
1064-
OldShuffles.push_back(Shuffle);
1065-
}
1066-
1067-
// Erase old users.
1068-
for (auto *Shuffle : OldShuffles)
1069-
Shuffle->eraseFromParent();
1070-
1071-
I.eraseFromParent();
1072-
return true;
1073-
}
1074-
}
1075-
return false;
1076-
}
1077-
1078990
namespace {
1079991
class StrNCmpInliner {
1080992
public:
@@ -1413,7 +1325,6 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
14131325
MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
14141326
MadeChange |= foldPatternedLoads(I, DL);
14151327
MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);
1416-
MadeChange |= shrinkLoadsForBroadcast(I);
14171328
// NOTE: This function introduces erasing of the instruction `I`, so it
14181329
// needs to be called at the end of this sequence, otherwise we may make
14191330
// bugs.

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ class VectorCombine {
137137
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
138138
bool foldInterleaveIntrinsics(Instruction &I);
139139
bool shrinkType(Instruction &I);
140+
bool shrinkLoadForShuffles(Instruction &I);
140141

141142
void replaceValue(Value &Old, Value &New) {
142143
LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
@@ -3691,6 +3692,101 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
36913692
return true;
36923693
}
36933694

3695+
// If `I` is a load instruction, used only by shufflevector instructions with
3696+
// poison values, attempt to shrink the load to only the lanes being used.
3697+
bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3698+
auto *OldLoad = dyn_cast<LoadInst>(&I);
3699+
if (!OldLoad || !OldLoad->isSimple())
3700+
return false;
3701+
3702+
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
3703+
if (!VecTy)
3704+
return false;
3705+
3706+
auto IsPoisonOrUndef = [](Value *V) -> bool {
3707+
if (auto *C = dyn_cast<Constant>(V)) {
3708+
return isa<PoisonValue>(C) || isa<UndefValue>(C);
3709+
}
3710+
return false;
3711+
};
3712+
3713+
using IndexRange = std::pair<int, int>;
3714+
auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
3715+
auto OutputRange = IndexRange(VecTy->getNumElements(), -1);
3716+
for (auto &Use : I.uses()) {
3717+
// All uses must be ShuffleVector instructions.
3718+
auto *Shuffle = dyn_cast<ShuffleVectorInst>(Use.getUser());
3719+
if (!Shuffle)
3720+
return {};
3721+
3722+
// Get index range for value.
3723+
auto *Op0 = Shuffle->getOperand(0u);
3724+
auto *Op1 = Shuffle->getOperand(1u);
3725+
if (!IsPoisonOrUndef(Op1))
3726+
return {};
3727+
3728+
// Find the min and max indices used by the ShuffleVector instruction.
3729+
auto Mask = Shuffle->getShuffleMask();
3730+
auto *Op0Ty = cast<FixedVectorType>(Op0->getType());
3731+
auto NumElems = int(Op0Ty->getNumElements());
3732+
3733+
for (auto Index : Mask) {
3734+
if (Index >= 0 && Index < NumElems) {
3735+
OutputRange.first = std::min(Index, OutputRange.first);
3736+
OutputRange.second = std::max(Index, OutputRange.second);
3737+
}
3738+
}
3739+
3740+
if (OutputRange.second < OutputRange.first)
3741+
return {};
3742+
}
3743+
return OutputRange;
3744+
};
3745+
3746+
if (auto Indices = GetIndexRangeInShuffles()) {
3747+
auto OldSize = VecTy->getNumElements();
3748+
auto NewSize = Indices->second + 1u;
3749+
3750+
if (NewSize < OldSize) {
3751+
auto Builder = IRBuilder(&I);
3752+
Builder.SetCurrentDebugLocation(I.getDebugLoc());
3753+
3754+
// Create new load of smaller vector.
3755+
auto *ElemTy = VecTy->getElementType();
3756+
auto *NewVecTy = FixedVectorType::get(ElemTy, NewSize);
3757+
auto *NewLoad = cast<LoadInst>(
3758+
Builder.CreateLoad(NewVecTy, OldLoad->getPointerOperand()));
3759+
NewLoad->copyMetadata(I);
3760+
3761+
// Compare cost of old and new loads.
3762+
auto OldCost = TTI.getMemoryOpCost(
3763+
Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
3764+
OldLoad->getPointerAddressSpace(), CostKind);
3765+
auto NewCost = TTI.getMemoryOpCost(
3766+
Instruction::Load, NewLoad->getType(), NewLoad->getAlign(),
3767+
NewLoad->getPointerAddressSpace(), CostKind);
3768+
3769+
if (OldCost < NewCost || !NewCost.isValid())
3770+
return false;
3771+
3772+
// Replace all users.
3773+
for (auto &Use : I.uses()) {
3774+
auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
3775+
3776+
Builder.SetInsertPoint(Shuffle);
3777+
Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc());
3778+
auto *NewShuffle = Builder.CreateShuffleVector(
3779+
NewLoad, PoisonValue::get(NewVecTy), Shuffle->getShuffleMask());
3780+
3781+
replaceValue(*Shuffle, *NewShuffle);
3782+
}
3783+
3784+
return true;
3785+
}
3786+
}
3787+
return false;
3788+
}
3789+
36943790
/// This is the entry point for all transforms. Pass manager differences are
36953791
/// handled in the callers of this function.
36963792
bool VectorCombine::run() {
@@ -3775,6 +3871,9 @@ bool VectorCombine::run() {
37753871
case Instruction::Xor:
37763872
MadeChange |= foldBitOpOfBitcasts(I);
37773873
break;
3874+
case Instruction::Load:
3875+
MadeChange |= shrinkLoadForShuffles(I);
3876+
break;
37783877
default:
37793878
MadeChange |= shrinkType(I);
37803879
break;

llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ $getAt = comdat any
1111

1212
define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 {
1313
; SSE-LABEL: @ConvertVectors_ByRef(
14-
; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
15-
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
14+
; SSE-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[TMP0:%.*]], align 16
15+
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
1616
; SSE-NEXT: ret <4 x float> [[TMP3]]
1717
;
1818
; AVX-LABEL: @ConvertVectors_ByRef(
19-
; AVX-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
20-
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
19+
; AVX-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[TMP0:%.*]], align 16
20+
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
2121
; AVX-NEXT: ret <4 x float> [[TMP3]]
2222
;
2323
%2 = alloca ptr, align 8

llvm/test/Transforms/VectorCombine/X86/load-widening.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ define <8 x float> @load_v2f32_v8f32(ptr dereferenceable(32) %p) {
336336

337337
define <4 x i32> @load_v2i32_v4i32(ptr dereferenceable(16) %p) {
338338
; CHECK-LABEL: @load_v2i32_v4i32(
339-
; CHECK-NEXT: [[S:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
339+
; CHECK-NEXT: [[S:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
340340
; CHECK-NEXT: ret <4 x i32> [[S]]
341341
;
342342
%l = load <2 x i32>, ptr %p, align 1
@@ -443,8 +443,8 @@ define <8 x float> @load_v2f32_v8f32_hwasan(ptr dereferenceable(32) %p) sanitize
443443

444444
define <4 x i32> @load_v2i32_v4i32_asan(ptr dereferenceable(16) %p) sanitize_address {
445445
; CHECK-LABEL: @load_v2i32_v4i32_asan(
446-
; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
447-
; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
446+
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
447+
; CHECK-NEXT: [[S:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
448448
; CHECK-NEXT: ret <4 x i32> [[S]]
449449
;
450450
%l = load <2 x i32>, ptr %p, align 1

llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,21 +47,12 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
4747
; broadcast loads are free on AVX (and blends are much cheap than general 2-operand shuffles)
4848

4949
define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) {
50-
; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64(
51-
; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
52-
; SSE-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
53-
; SSE-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
54-
; SSE-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> <i32 0, i32 4, i32 4, i32 0>
55-
; SSE-NEXT: ret <4 x double> [[BLEND]]
56-
;
57-
; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64(
58-
; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
59-
; AVX-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
60-
; AVX-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
61-
; AVX-NEXT: [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer
62-
; AVX-NEXT: [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer
63-
; AVX-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
64-
; AVX-NEXT: ret <4 x double> [[BLEND]]
50+
; CHECK-LABEL: define <4 x double> @blend_broadcasts_v4f64(
51+
; CHECK-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
52+
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x double>, ptr [[P0]], align 8
53+
; CHECK-NEXT: [[TMP2:%.*]] = load <1 x double>, ptr [[P1]], align 8
54+
; CHECK-NEXT: [[BLEND:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 1, i32 0>
55+
; CHECK-NEXT: ret <4 x double> [[BLEND]]
6556
;
6657
%ld0 = load <4 x double>, ptr %p0, align 32
6758
%ld1 = load <4 x double>, ptr %p1, align 32
@@ -81,3 +72,6 @@ define <2 x float> @PR86068(<2 x float> %a0, <2 x float> %a1) {
8172
%s2 = shufflevector <2 x float> %s1, <2 x float> %a0, <2 x i32> <i32 0, i32 3>
8273
ret <2 x float> %s2
8374
}
75+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
76+
; AVX: {{.*}}
77+
; SSE: {{.*}}

llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll renamed to llvm/test/Transforms/VectorCombine/load-shufflevector.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -passes=aggressive-instcombine -S < %s | FileCheck %s
2+
; RUN: opt -passes=vector-combine -S < %s | FileCheck %s
33

44
define <8 x half> @shuffle_v4_v8f16_r0_1_volatile(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
55
; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1_volatile(

0 commit comments

Comments
 (0)