Skip to content

Commit bcc7fb0

Browse files
committed
[VectorCombine] Allow shuffling between vectors the same type but different element sizes
`foldInsExtVectorToShuffle` function combines the extract/insert of a vector into a vector through a shuffle. However, we only supported coupling between vectors of the same size. This commit allows combining extract/insert for vectors of the same type but with different sizes by converting the length of the vectors. Proof: https://alive2.llvm.org/ce/z/ELNLr7 Fixed #120772
1 parent 7e34721 commit bcc7fb0

File tree

5 files changed

+132
-70
lines changed

5 files changed

+132
-70
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3018,24 +3018,37 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
30183018
return false;
30193019

30203020
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
3021-
if (!VecTy || SrcVec->getType() != VecTy)
3021+
auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType());
3022+
// We can try combining vectors with different element sizes.
3023+
if (!VecTy || !SrcVecTy ||
3024+
SrcVecTy->getElementType() != VecTy->getElementType())
30223025
return false;
30233026

30243027
unsigned NumElts = VecTy->getNumElements();
3025-
if (ExtIdx >= NumElts || InsIdx >= NumElts)
3028+
unsigned NumSrcElts = SrcVecTy->getNumElements();
3029+
if (InsIdx >= NumElts || NumElts == 1)
30263030
return false;
30273031

30283032
// Insertion into poison is a cheaper single operand shuffle.
30293033
TargetTransformInfo::ShuffleKind SK;
30303034
SmallVector<int> Mask(NumElts, PoisonMaskElem);
3031-
if (isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
3035+
3036+
bool NeedExpOrNarrow = NumSrcElts != NumElts;
3037+
bool NeedDstSrcSwap = isa<PoisonValue>(DstVec) && !isa<UndefValue>(SrcVec);
3038+
if (NeedDstSrcSwap) {
30323039
SK = TargetTransformInfo::SK_PermuteSingleSrc;
3033-
Mask[InsIdx] = ExtIdx;
3040+
if (!NeedExpOrNarrow)
3041+
Mask[InsIdx] = ExtIdx;
3042+
else
3043+
Mask[InsIdx] = 0;
30343044
std::swap(DstVec, SrcVec);
30353045
} else {
30363046
SK = TargetTransformInfo::SK_PermuteTwoSrc;
30373047
std::iota(Mask.begin(), Mask.end(), 0);
3038-
Mask[InsIdx] = ExtIdx + NumElts;
3048+
if (!NeedExpOrNarrow)
3049+
Mask[InsIdx] = ExtIdx + NumElts;
3050+
else
3051+
Mask[InsIdx] = NumElts;
30393052
}
30403053

30413054
// Cost
@@ -3047,8 +3060,23 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
30473060
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
30483061
InstructionCost OldCost = ExtCost + InsCost;
30493062

3050-
InstructionCost NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0,
3051-
nullptr, {DstVec, SrcVec});
3063+
InstructionCost NewCost = 0;
3064+
SmallVector<int> ExtToVecMask;
3065+
if (!NeedExpOrNarrow) {
3066+
NewCost = TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr,
3067+
{DstVec, SrcVec});
3068+
} else {
3069+
// When creating length-changing-vector, always create with a Mask whose
3070+
// first element has an ExtIdx, so that the first element of the vector
3071+
// being created is always the target to be extracted.
3072+
ExtToVecMask.assign(NumElts, PoisonMaskElem);
3073+
ExtToVecMask[0] = ExtIdx;
3074+
// Add cost for expanding or narrowing
3075+
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
3076+
VecTy, ExtToVecMask, CostKind);
3077+
NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind);
3078+
}
3079+
30523080
if (!Ext->hasOneUse())
30533081
NewCost += ExtCost;
30543082

@@ -3059,6 +3087,13 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
30593087
if (OldCost < NewCost)
30603088
return false;
30613089

3090+
if (NeedExpOrNarrow) {
3091+
if (!NeedDstSrcSwap)
3092+
SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask);
3093+
else
3094+
DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask);
3095+
}
3096+
30623097
// Canonicalize undef param to RHS to help further folds.
30633098
if (isa<UndefValue>(DstVec) && !isa<UndefValue>(SrcVec)) {
30643099
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);

llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,31 @@ define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
2626
}
2727

2828
define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
29-
; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64(
30-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
31-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
32-
; CHECK-NEXT: ret <4 x double> [[INS]]
29+
; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64(
30+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
31+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
32+
; SSE-NEXT: ret <4 x double> [[INS]]
33+
;
34+
; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
35+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
36+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
37+
; AVX-NEXT: ret <4 x double> [[INS]]
3338
;
3439
%ext = extractelement <2 x double> %b, i32 0
3540
%ins = insertelement <4 x double> poison, double %ext, i32 2
3641
ret <4 x double> %ins
3742
}
3843

3944
define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 {
40-
; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64(
41-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
42-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
43-
; CHECK-NEXT: ret <4 x double> [[INS]]
45+
; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64(
46+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0
47+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
48+
; SSE-NEXT: ret <4 x double> [[INS]]
49+
;
50+
; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
51+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
52+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
53+
; AVX-NEXT: ret <4 x double> [[INS]]
4454
;
4555
%ext = extractelement <2 x double> %b, i32 0
4656
%ins = insertelement <4 x double> poison, double %ext, i32 3
@@ -60,8 +70,8 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
6070

6171
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
6272
; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
63-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
64-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1
73+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
74+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
6575
; CHECK-NEXT: ret <4 x double> [[INS]]
6676
;
6777
%ext = extractelement <2 x double> %b, i32 1
@@ -70,21 +80,31 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
7080
}
7181

7282
define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
73-
; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64(
74-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
75-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
76-
; CHECK-NEXT: ret <4 x double> [[INS]]
83+
; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64(
84+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
85+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2
86+
; SSE-NEXT: ret <4 x double> [[INS]]
87+
;
88+
; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
89+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
90+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
91+
; AVX-NEXT: ret <4 x double> [[INS]]
7792
;
7893
%ext = extractelement <2 x double> %b, i32 1
7994
%ins = insertelement <4 x double> poison, double %ext, i32 2
8095
ret <4 x double> %ins
8196
}
8297

8398
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
84-
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
85-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
86-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
87-
; CHECK-NEXT: ret <4 x double> [[INS]]
99+
; SSE-LABEL: @src_ins3_v4f64_ext1_v2f64(
100+
; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
101+
; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3
102+
; SSE-NEXT: ret <4 x double> [[INS]]
103+
;
104+
; AVX-LABEL: @src_ins3_v4f64_ext1_v2f64(
105+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
106+
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
107+
; AVX-NEXT: ret <4 x double> [[INS]]
88108
;
89109
%ext = extractelement <2 x double> %b, i32 1
90110
%ins = insertelement <4 x double> poison, double %ext, i32 3
@@ -148,8 +168,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
148168

149169
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
150170
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
151-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
152-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
171+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
172+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
153173
; CHECK-NEXT: ret <2 x double> [[INS]]
154174
;
155175
%ext = extractelement <4 x double> %b, i32 1
@@ -170,15 +190,12 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
170190

171191
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
172192
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
173-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
174-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1
193+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
194+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
175195
; CHECK-NEXT: ret <2 x double> [[INS]]
176196
;
177197
%ext = extractelement <4 x double> %b, i32 3
178198
%ins = insertelement <2 x double> poison, double %ext, i32 1
179199
ret <2 x double> %ins
180200
}
181201

182-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
183-
; AVX: {{.*}}
184-
; SSE: {{.*}}

llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,15 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
5959
}
6060

6161
define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
62-
; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
63-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
64-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
65-
; CHECK-NEXT: ret <4 x double> [[INS]]
62+
; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
63+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
64+
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
65+
; SSE-NEXT: ret <4 x double> [[INS]]
66+
;
67+
; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
68+
; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
69+
; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1
70+
; AVX-NEXT: ret <4 x double> [[INS]]
6671
;
6772
%ext = extractelement <2 x double> %b, i32 1
6873
%ins = insertelement <4 x double> undef, double %ext, i32 1
@@ -82,8 +87,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
8287

8388
define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
8489
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
85-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1
86-
; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3
90+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
91+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
8792
; CHECK-NEXT: ret <4 x double> [[INS]]
8893
;
8994
%ext = extractelement <2 x double> %b, i32 1
@@ -148,8 +153,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
148153

149154
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
150155
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
151-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1
152-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
156+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
157+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
153158
; CHECK-NEXT: ret <2 x double> [[INS]]
154159
;
155160
%ext = extractelement <4 x double> %b, i32 1
@@ -170,15 +175,12 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)
170175

171176
define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
172177
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
173-
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3
174-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1
178+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
179+
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> <i32 2, i32 0>
175180
; CHECK-NEXT: ret <2 x double> [[INS]]
176181
;
177182
%ext = extractelement <4 x double> %b, i32 3
178183
%ins = insertelement <2 x double> undef, double %ext, i32 1
179184
ret <2 x double> %ins
180185
}
181186

182-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
183-
; AVX: {{.*}}
184-
; SSE: {{.*}}

0 commit comments

Comments
 (0)