Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13776,6 +13776,14 @@ static SDValue lowerV8I16GeneralSingleInputShuffle(
DWordPairs.resize(2, std::make_pair(-1, -1));
int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second,
DWordPairs[1].first, DWordPairs[1].second};
// For splat, ensure we widen the PSHUFDMask to allow vXi64 folds.
if (ShuffleVectorSDNode::isSplatMask(PSHUFDMask) &&
ShuffleVectorSDNode::isSplatMask(PSHUFHalfMask)) {
int SplatIdx = ShuffleVectorSDNode::getSplatMaskIndex(PSHUFHalfMask);
std::fill(PSHUFHalfMask, PSHUFHalfMask + 4, SplatIdx);
PSHUFDMask[0] = PSHUFDMask[2] = DOffset + 0;
PSHUFDMask[1] = PSHUFDMask[3] = DOffset + 1;
}
if ((NumHToL + NumHToH) == 0)
return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW);
if ((NumLToL + NumLToH) == 0)
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2363,7 +2363,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
; SSE2-NEXT: paddb (%rdx), %xmm0
; SSE2-NEXT: movdqa 16(%rdx), %xmm2
; SSE2-NEXT: paddb %xmm1, %xmm2
Expand All @@ -2384,7 +2384,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
; SSE42-NEXT: paddb (%rdx), %xmm0
; SSE42-NEXT: movdqa 16(%rdx), %xmm2
; SSE42-NEXT: paddb %xmm1, %xmm2
Expand All @@ -2405,7 +2405,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
Expand Down Expand Up @@ -2497,7 +2497,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: paddb (%rdx), %xmm2
Expand Down Expand Up @@ -2762,7 +2762,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,0,255,255,255,255,255,0,255,255,255]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: paddb (%rdx), %xmm2
Expand All @@ -2783,7 +2783,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
; SSE42-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
; SSE42-NEXT: paddb (%rdx), %xmm0
; SSE42-NEXT: movdqa 16(%rdx), %xmm2
; SSE42-NEXT: paddb %xmm1, %xmm2
Expand All @@ -2802,7 +2802,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
; AVX-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
Expand Down Expand Up @@ -3549,7 +3549,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,0,65535]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: paddb (%rdx), %xmm2
Expand All @@ -3568,7 +3568,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; SSE42-NEXT: paddb 48(%rsi), %xmm1
; SSE42-NEXT: paddb (%rsi), %xmm0
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
; SSE42-NEXT: paddb (%rdx), %xmm1
; SSE42-NEXT: movdqa 16(%rdx), %xmm2
Expand All @@ -3586,7 +3586,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
Expand Down Expand Up @@ -4963,7 +4963,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
; SSE-NEXT: movdqa (%rdi), %xmm0
; SSE-NEXT: paddb (%rsi), %xmm0
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: movdqa 16(%rdx), %xmm1
; SSE-NEXT: paddb %xmm0, %xmm1
; SSE-NEXT: movdqa (%rdx), %xmm2
Expand All @@ -4982,7 +4982,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
; AVX-NEXT: vmovdqa (%rdi), %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpaddb 48(%rdx), %xmm0, %xmm1
; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1837,7 +1837,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: movdqa 16(%rsi), %xmm2
; SSE2-NEXT: paddb %xmm1, %xmm2
Expand All @@ -1856,7 +1856,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE42-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
; SSE42-NEXT: paddb (%rsi), %xmm0
; SSE42-NEXT: movdqa 16(%rsi), %xmm2
; SSE42-NEXT: paddb %xmm1, %xmm2
Expand All @@ -1875,9 +1875,9 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
Expand Down Expand Up @@ -1954,7 +1954,7 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
; SSE2-NEXT: pand %xmm1, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: pandn %xmm0, %xmm1
; SSE2-NEXT: por %xmm2, %xmm1
; SSE2-NEXT: paddb (%rsi), %xmm1
Expand Down Expand Up @@ -2174,7 +2174,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
; SSE2-NEXT: movdqa 48(%rdi), %xmm1
; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: paddb (%rsi), %xmm0
Expand All @@ -2192,7 +2192,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
; SSE42-NEXT: palignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; SSE42-NEXT: movdqa 16(%rsi), %xmm2
; SSE42-NEXT: paddb %xmm1, %xmm2
; SSE42-NEXT: paddb (%rsi), %xmm0
Expand All @@ -2208,9 +2208,9 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
; AVX-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
Expand Down Expand Up @@ -2835,7 +2835,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; SSE2-NEXT: movdqa 48(%rdi), %xmm1
; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = mem[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; SSE2-NEXT: pandn %xmm2, %xmm0
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: paddb (%rsi), %xmm0
Expand All @@ -2850,7 +2850,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; SSE42-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
; SSE42: # %bb.0:
; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE42-NEXT: movdqa 48(%rdi), %xmm1
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
; SSE42-NEXT: paddb (%rsi), %xmm1
Expand All @@ -2865,7 +2865,7 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
; AVX-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
; AVX: # %bb.0:
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7]
; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
Expand Down Expand Up @@ -3913,7 +3913,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
; SSE-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
; SSE: # %bb.0:
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: movdqa 16(%rsi), %xmm1
; SSE-NEXT: paddb %xmm0, %xmm1
; SSE-NEXT: movdqa (%rsi), %xmm2
Expand All @@ -3930,7 +3930,7 @@ define void @vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32(ptr %in
; AVX-LABEL: vec512_i8_widen_to_i16_factor2_broadcast_to_v32i16_factor32:
; AVX: # %bb.0:
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vpaddb 48(%rsi), %xmm0, %xmm1
; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm3
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1899,15 +1899,15 @@ define <4 x i64> @test_mm256_set1_epi16(i16 %a0) nounwind {
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovd %eax, %xmm0
; X86-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: test_mm256_set1_epi16:
; X64: # %bb.0:
; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
%res0 = insertelement <16 x i16> undef, i16 %a0, i32 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcB:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
Expand All @@ -107,7 +107,7 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -286,7 +286,7 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: movd %edi, %xmm0
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Expand All @@ -300,7 +300,7 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
Expand Down Expand Up @@ -519,7 +519,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: movd %edi, %xmm2
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
Expand All @@ -528,7 +528,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,1,0,1]
; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
Expand All @@ -540,7 +540,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm1
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
Expand All @@ -550,7 +550,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
Expand Down
Loading
Loading