Skip to content

Commit 6027063

Browse files
committed
Fix invalid argument in getCmpSelInstrCost
The target vector for SelectInst is SrcVecTy. It was a mistake to specify DstVecTy as an argument before. it is now fixed.
1 parent eb61e31 commit 6027063

File tree

3 files changed

+50
-105
lines changed

3 files changed

+50
-105
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2055,8 +2055,8 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
20552055
auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
20562056
auto SelOp = Instruction::Select;
20572057
InstructionCost OldCost = TTI.getCmpSelInstrCost(
2058-
SelOp, DstVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
2059-
OldCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C2VecTy,
2058+
SelOp, SrcVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
2059+
OldCost += TTI.getCmpSelInstrCost(SelOp, SrcVecTy, C2VecTy,
20602060
CmpInst::BAD_ICMP_PREDICATE, CostKind);
20612061
OldCost += TTI.getShuffleCost(SK, SrcVecTy, Mask, CostKind, 0, nullptr,
20622062
{I.getOperand(0), I.getOperand(1)}, &I);

llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll

Lines changed: 33 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,11 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64>
117117
; SSE-LABEL: @x86_pblendvb_v8i32_v4i32(
118118
; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
119119
; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
120-
; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
120+
; SSE-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
121+
; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
121122
; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
122-
; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
123-
; SSE-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
124-
; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP4]] to <4 x i64>
123+
; SSE-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
124+
; SSE-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
125125
; SSE-NEXT: ret <4 x i64> [[RES]]
126126
;
127127
; AVX2-LABEL: @x86_pblendvb_v8i32_v4i32(
@@ -168,11 +168,11 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64
168168
; SSE-LABEL: @x86_pblendvb_v16i16_v8i16(
169169
; SSE-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
170170
; SSE-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
171-
; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
171+
; SSE-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
172+
; SSE-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
172173
; SSE-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
173-
; SSE-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
174-
; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
175-
; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP4]] to <4 x i64>
174+
; SSE-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
175+
; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
176176
; SSE-NEXT: ret <4 x i64> [[RES]]
177177
;
178178
; AVX2-LABEL: @x86_pblendvb_v16i16_v8i16(
@@ -270,20 +270,10 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64>
270270
;
271271

272272
define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
273-
; SSE-LABEL: @x86_pblendvb_v8f64_v4f64(
274-
; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
275-
; SSE-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
276-
; SSE-NEXT: ret <8 x double> [[DOTV]]
277-
;
278-
; AVX2-LABEL: @x86_pblendvb_v8f64_v4f64(
279-
; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
280-
; AVX2-NEXT: [[DOTV:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
281-
; AVX2-NEXT: ret <8 x double> [[DOTV]]
282-
;
283-
; AVX512-LABEL: @x86_pblendvb_v8f64_v4f64(
284-
; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
285-
; AVX512-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
286-
; AVX512-NEXT: ret <8 x double> [[DOTV]]
273+
; CHECK-LABEL: @x86_pblendvb_v8f64_v4f64(
274+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
275+
; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
276+
; CHECK-NEXT: ret <8 x double> [[DOTV]]
287277
;
288278
%a.bc = bitcast <8 x double> %a to <64 x i8>
289279
%b.bc = bitcast <8 x double> %b to <64 x i8>
@@ -304,20 +294,10 @@ define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b,
304294
}
305295

306296
define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
307-
; SSE-LABEL: @x86_pblendvb_v16f32_v8f32(
308-
; SSE-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
309-
; SSE-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
310-
; SSE-NEXT: ret <16 x float> [[DOTV]]
311-
;
312-
; AVX2-LABEL: @x86_pblendvb_v16f32_v8f32(
313-
; AVX2-NEXT: [[TMP1:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
314-
; AVX2-NEXT: [[DOTV:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
315-
; AVX2-NEXT: ret <16 x float> [[DOTV]]
316-
;
317-
; AVX512-LABEL: @x86_pblendvb_v16f32_v8f32(
318-
; AVX512-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
319-
; AVX512-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
320-
; AVX512-NEXT: ret <16 x float> [[DOTV]]
297+
; CHECK-LABEL: @x86_pblendvb_v16f32_v8f32(
298+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
299+
; CHECK-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
300+
; CHECK-NEXT: ret <16 x float> [[DOTV]]
321301
;
322302
%a.bc = bitcast <16 x float> %a to <64 x i8>
323303
%b.bc = bitcast <16 x float> %b to <64 x i8>
@@ -338,20 +318,10 @@ define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b,
338318
}
339319

340320
define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
341-
; SSE-LABEL: @x86_pblendvb_v8i64_v4i64(
342-
; SSE-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
343-
; SSE-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
344-
; SSE-NEXT: ret <8 x i64> [[TMP2]]
345-
;
346-
; AVX2-LABEL: @x86_pblendvb_v8i64_v4i64(
347-
; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
348-
; AVX2-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
349-
; AVX2-NEXT: ret <8 x i64> [[TMP2]]
350-
;
351-
; AVX512-LABEL: @x86_pblendvb_v8i64_v4i64(
352-
; AVX512-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
353-
; AVX512-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
354-
; AVX512-NEXT: ret <8 x i64> [[TMP1]]
321+
; CHECK-LABEL: @x86_pblendvb_v8i64_v4i64(
322+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i64> [[C:%.*]], [[D:%.*]]
323+
; CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[CMP]], <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]]
324+
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
355325
;
356326
%a.bc = bitcast <8 x i64> %a to <64 x i8>
357327
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -372,35 +342,15 @@ define <8 x i64> @x86_pblendvb_v8i64_v4i64(<8 x i64> %a, <8 x i64> %b, <8 x i64>
372342
}
373343

374344
define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
375-
; SSE-LABEL: @x86_pblendvb_v16i32_v8i32(
376-
; SSE-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
377-
; SSE-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
378-
; SSE-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
379-
; SSE-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
380-
; SSE-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
381-
; SSE-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
382-
; SSE-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64>
383-
; SSE-NEXT: ret <8 x i64> [[RES]]
384-
;
385-
; AVX2-LABEL: @x86_pblendvb_v16i32_v8i32(
386-
; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
387-
; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
388-
; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
389-
; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
390-
; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
391-
; AVX2-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
392-
; AVX2-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP4]] to <8 x i64>
393-
; AVX2-NEXT: ret <8 x i64> [[RES]]
394-
;
395-
; AVX512-LABEL: @x86_pblendvb_v16i32_v8i32(
396-
; AVX512-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
397-
; AVX512-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
398-
; AVX512-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
399-
; AVX512-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
400-
; AVX512-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
401-
; AVX512-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]]
402-
; AVX512-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64>
403-
; AVX512-NEXT: ret <8 x i64> [[RES]]
345+
; CHECK-LABEL: @x86_pblendvb_v16i32_v8i32(
346+
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
347+
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
348+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
349+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
350+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
351+
; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]]
352+
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64>
353+
; CHECK-NEXT: ret <8 x i64> [[RES]]
404354
;
405355
%a.bc = bitcast <8 x i64> %a to <64 x i8>
406356
%b.bc = bitcast <8 x i64> %b to <64 x i8>
@@ -436,11 +386,11 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6
436386
; AVX2-LABEL: @x86_pblendvb_v32i16_v16i16(
437387
; AVX2-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
438388
; AVX2-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
439-
; AVX2-NEXT: [[TMP1:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
389+
; AVX2-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
390+
; AVX2-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
440391
; AVX2-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
441-
; AVX2-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
442-
; AVX2-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[TMP2]], <32 x i16> [[TMP3]]
443-
; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP4]] to <8 x i64>
392+
; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
393+
; AVX2-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
444394
; AVX2-NEXT: ret <8 x i64> [[RES]]
445395
;
446396
; AVX512-LABEL: @x86_pblendvb_v32i16_v16i16(

llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,9 @@ define <8 x i16> @src_v8tov8_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i1
6464
define <16 x i16> @src_v8tov16_i16(<8 x i1> %a, <8 x i1> %b, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
6565
; SSE-LABEL: define <16 x i16> @src_v8tov16_i16(
6666
; SSE-SAME: <8 x i1> [[A:%.*]], <8 x i1> [[B:%.*]], <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]], <8 x i16> [[Z:%.*]]) #[[ATTR0]] {
67-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[A]], <8 x i1> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
68-
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X]], <8 x i16> [[Y]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
69-
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[Z]], <8 x i16> [[X]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
70-
; SSE-NEXT: [[RES:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP2]], <16 x i16> [[TMP3]]
67+
; SSE-NEXT: [[SELECT_XZ:%.*]] = select <8 x i1> [[A]], <8 x i16> [[X]], <8 x i16> [[Z]]
68+
; SSE-NEXT: [[SELECT_YX:%.*]] = select <8 x i1> [[B]], <8 x i16> [[Y]], <8 x i16> [[X]]
69+
; SSE-NEXT: [[RES:%.*]] = shufflevector <8 x i16> [[SELECT_XZ]], <8 x i16> [[SELECT_YX]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7170
; SSE-NEXT: ret <16 x i16> [[RES]]
7271
;
7372
; AVX2-LABEL: define <16 x i16> @src_v8tov16_i16(
@@ -108,10 +107,9 @@ define <4 x i32> @src_v2tov4_i32(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i3
108107
define <8 x i32> @src_v4tov8_i32(<4 x i1> %a, <4 x i1> %b, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
109108
; SSE-LABEL: define <8 x i32> @src_v4tov8_i32(
110109
; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
111-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
112-
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
113-
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
114-
; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
110+
; SSE-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x i32> [[X]], <4 x i32> [[Z]]
111+
; SSE-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x i32> [[Y]], <4 x i32> [[X]]
112+
; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[SELECT_XZ]], <4 x i32> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
115113
; SSE-NEXT: ret <8 x i32> [[RES]]
116114
;
117115
; AVX2-LABEL: define <8 x i32> @src_v4tov8_i32(
@@ -168,10 +166,9 @@ define <2 x i64> @src_v2tov2_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i6
168166
define <4 x i64> @src_v2tov4_i64(<2 x i1> %a, <2 x i1> %b, <2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
169167
; SSE-LABEL: define <4 x i64> @src_v2tov4_i64(
170168
; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
171-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
172-
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
173-
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[Z]], <2 x i64> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
174-
; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]]
169+
; SSE-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i64> [[X]], <2 x i64> [[Z]]
170+
; SSE-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i64> [[Y]], <2 x i64> [[X]]
171+
; SSE-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[SELECT_XZ]], <2 x i64> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
175172
; SSE-NEXT: ret <4 x i64> [[RES]]
176173
;
177174
; AVX2-LABEL: define <4 x i64> @src_v2tov4_i64(
@@ -228,10 +225,9 @@ define <4 x float> @src_v2tov4_float(<2 x i1> %a, <2 x i1> %b, <2 x float> %x, <
228225
define <8 x float> @src_v4tov8_float(<4 x i1> %a, <4 x i1> %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
229226
; SSE-LABEL: define <8 x float> @src_v4tov8_float(
230227
; SSE-SAME: <4 x i1> [[A:%.*]], <4 x i1> [[B:%.*]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
231-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A]], <4 x i1> [[B]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
232-
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
233-
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Z]], <4 x float> [[X]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
234-
; SSE-NEXT: [[RES:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]]
228+
; SSE-NEXT: [[SELECT_XZ:%.*]] = select <4 x i1> [[A]], <4 x float> [[X]], <4 x float> [[Z]]
229+
; SSE-NEXT: [[SELECT_YX:%.*]] = select <4 x i1> [[B]], <4 x float> [[Y]], <4 x float> [[X]]
230+
; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[SELECT_XZ]], <4 x float> [[SELECT_YX]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
235231
; SSE-NEXT: ret <8 x float> [[RES]]
236232
;
237233
; AVX2-LABEL: define <8 x float> @src_v4tov8_float(
@@ -288,10 +284,9 @@ define <2 x double> @src_v2tov2_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x
288284
define <4 x double> @src_v2tov4_double(<2 x i1> %a, <2 x i1> %b, <2 x double> %x, <2 x double> %y, <2 x double> %z) {
289285
; SSE-LABEL: define <4 x double> @src_v2tov4_double(
290286
; SSE-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]], <2 x double> [[Z:%.*]]) #[[ATTR0]] {
291-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[A]], <2 x i1> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
292-
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293-
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[Z]], <2 x double> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
294-
; SSE-NEXT: [[RES:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]]
287+
; SSE-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x double> [[X]], <2 x double> [[Z]]
288+
; SSE-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x double> [[Y]], <2 x double> [[X]]
289+
; SSE-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[SELECT_XZ]], <2 x double> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
295290
; SSE-NEXT: ret <4 x double> [[RES]]
296291
;
297292
; AVX2-LABEL: define <4 x double> @src_v2tov4_double(

0 commit comments

Comments
 (0)