Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41704,6 +41704,7 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op)) ||
(Op.getOpcode() == Opc && Op->hasOneUse()) ||
(Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op->hasOneUse()) ||
(Op.getOpcode() == ISD::CONCAT_VECTORS && Op->hasOneUse()) ||
(FoldShuf && isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
DAG.isSplatValue(Op, /*AllowUndefs*/ false);
};
Expand Down Expand Up @@ -58134,6 +58135,30 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,

unsigned Opcode = Op0.getOpcode();
switch (Opcode) {
case ISD::BITCAST: {
// TODO: Support AVX1/AVX2 bitcasts.
SmallVector<SDValue, 4> SubOps;
for (SDValue SubOp : Ops)
SubOps.push_back(peekThroughBitcasts(SubOp.getOperand(0)));
EVT InnerVT = SubOps[0].getValueType();
unsigned InnerSizeInBits = InnerVT.getScalarSizeInBits();
if (!IsSplat && InnerVT.isSimple() && InnerVT.isVector() &&
(Subtarget.hasBWI() ||
(EltSizeInBits >= 32 && InnerSizeInBits >= 32)) &&
((VT.is256BitVector() && Subtarget.hasVLX()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
llvm::all_of(SubOps, [InnerVT](SDValue Op) {
return Op.getValueType() == InnerVT;
})) {
MVT ConcatSVT = InnerVT.getScalarType().getSimpleVT();
MVT ConcatVT = MVT::getVectorVT(
ConcatSVT, VT.getSizeInBits() / ConcatSVT.getSizeInBits());
if (SDValue ConcatSrc = combineConcatVectorOps(
DL, ConcatVT, SubOps, DAG, Subtarget, Depth + 1))
return DAG.getBitcast(VT, ConcatSrc);
}
break;
}
case ISD::VECTOR_SHUFFLE: {
// TODO: Generalize NumOps support.
if (!IsSplat && NumOps == 2 &&
Expand Down
40 changes: 18 additions & 22 deletions llvm/test/CodeGen/X86/shift-i512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -123,20 +123,18 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) {
;
; AVX512VBMI-LABEL: lshr_i512_1:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm3
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX512VBMI-NEXT: vpsrlq $1, %xmm1, %xmm1
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
; AVX512VBMI-NEXT: vpsrlq $1, %xmm2, %xmm2
; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; AVX512VBMI-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
; AVX512VBMI-NEXT: vpshldq $63, %ymm3, %ymm1, %ymm1
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
Expand Down Expand Up @@ -238,20 +236,18 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) {
;
; AVX512VBMI-LABEL: ashr_i512_1:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm3
; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; AVX512VBMI-NEXT: vpsraq $1, %xmm1, %xmm1
; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
; AVX512VBMI-NEXT: vpsraq $1, %xmm2, %xmm2
; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; AVX512VBMI-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
; AVX512VBMI-NEXT: vpshldq $63, %ymm3, %ymm1, %ymm1
; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -297,23 +297,21 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
;
; AVX512BW-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,4,16,21]
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,30,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,29,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpermt2d %zmm2, %zmm1, %zmm0
; AVX512BW-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,4,8,13]
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,29,u,u,u,u,u,u,u,u,u,u,u,u,33,37,41,45,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,49,53,57,62,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,4,8,13]
; AVX512BWVL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [8,12,0,5]
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512BWVL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,30,u,u,u,u,u,u,u,u]
; AVX512BWVL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,29,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BWVL-NEXT: vpermt2d %ymm2, %ymm1, %ymm0
; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512BWVL-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,17,21,25,30,u,u,u,u,u,u,u,u,33,37,41,45,u,u,u,u,u,u,u,u,u,u,u,u,49,53,57,61,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512BWVL-NEXT: vpermd %zmm0, %zmm1, %zmm0
; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
;
Expand Down
48 changes: 31 additions & 17 deletions llvm/test/CodeGen/X86/vector-fshr-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1228,13 +1228,14 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
; AVX512VBMI2-NEXT: vpermt2b %zmm4, %zmm3, %zmm0
; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
; AVX512VBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm3, %zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
; AVX512VBMI2-NEXT: vpermb %zmm0, %zmm1, %zmm0
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
Expand All @@ -1251,16 +1252,29 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX10-LABEL: splatvar_funnnel_v32i8:
; AVX10: # %bb.0:
; AVX10-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
; AVX10-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX10-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
; AVX10-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
; AVX10-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
; AVX10-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
; AVX10-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
; AVX10-NEXT: retq
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95,0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87]
; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm3, %zmm0
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [32,34,36,38,40,42,44,46,0,2,4,6,8,10,12,14,48,50,52,54,56,58,60,62,16,18,20,22,24,26,28,30]
; AVX512VLVBMI2-NEXT: vpermb %zmm0, %zmm1, %zmm0
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VLVBMI2-NEXT: retq
;
; AVX10_256-LABEL: splatvar_funnnel_v32i8:
; AVX10_256: # %bb.0:
; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
; AVX10_256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX10_256-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
; AVX10_256-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
; AVX10_256-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
; XOPAVX1: # %bb.0:
Expand Down
25 changes: 13 additions & 12 deletions llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -992,25 +992,26 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512VBMI2-NEXT: vpermb %zmm0, %zmm3, %zmm0
; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm3, %ymm3
; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX512VBMI2-NEXT: vpermt2b %zmm3, %zmm2, %zmm0
; AVX512VBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vpermb %zmm0, %zmm2, %zmm0
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [32,34,36,38,40,42,44,46,0,2,4,6,8,10,12,14,48,50,52,54,56,58,60,62,16,18,20,22,24,26,28,30]
; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512VLVBMI2-NEXT: vpermb %zmm0, %zmm3, %zmm0
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm2, %ymm2
; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %ymm0, %ymm1
; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
; AVX512VLVBMI2-NEXT: vpermi2b %ymm2, %ymm1, %ymm0
; AVX512VLVBMI2-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: vpermb %zmm0, %zmm2, %zmm0
; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
Expand Down
30 changes: 16 additions & 14 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,14 @@ define void @store_i16_stride4_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
;
; AVX512BW-FCP-LABEL: store_i16_stride4_vf2:
; AVX512BW-FCP: # %bb.0:
; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
; AVX512BW-FCP-NEXT: vmovdqa (%rdx), %xmm1
; AVX512BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; AVX512BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,2,8,10,1,3,9,11]
; AVX512BW-FCP-NEXT: vpermi2w %xmm1, %xmm0, %xmm2
; AVX512BW-FCP-NEXT: vmovdqa %xmm2, (%r8)
; AVX512BW-FCP-NEXT: vmovdqa (%rdx), %xmm0
; AVX512BW-FCP-NEXT: vmovdqa (%rcx), %xmm1
; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm2 = [8,24,0,16,9,25,1,17]
; AVX512BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm1, %ymm1
; AVX512BW-FCP-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0
; AVX512BW-FCP-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512BW-FCP-NEXT: vmovdqa %xmm0, (%r8)
; AVX512BW-FCP-NEXT: vzeroupper
; AVX512BW-FCP-NEXT: retq
;
; AVX512DQ-BW-LABEL: store_i16_stride4_vf2:
Expand All @@ -152,13 +153,14 @@ define void @store_i16_stride4_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
;
; AVX512DQ-BW-FCP-LABEL: store_i16_stride4_vf2:
; AVX512DQ-BW-FCP: # %bb.0:
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdx), %xmm1
; AVX512DQ-BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; AVX512DQ-BW-FCP-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm2 = [0,2,8,10,1,3,9,11]
; AVX512DQ-BW-FCP-NEXT: vpermi2w %xmm1, %xmm0, %xmm2
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm2, (%r8)
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdx), %xmm0
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rcx), %xmm1
; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} xmm2 = [8,24,0,16,9,25,1,17]
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm1, %ymm1
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0
; AVX512DQ-BW-FCP-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm0, (%r8)
; AVX512DQ-BW-FCP-NEXT: vzeroupper
; AVX512DQ-BW-FCP-NEXT: retq
%in.vec0 = load <2 x i16>, ptr %in.vecptr0, align 64
%in.vec1 = load <2 x i16>, ptr %in.vecptr1, align 64
Expand Down
Loading