Skip to content

Commit 73651ba

Browse files
authored
[x86] lowerV4I32Shuffle - don't adjust PSHUFD splat masks to match UNPCK (#161846)
Allow getV4X86ShuffleImm8ForMask to create a pure splat mask, helping to reduce demanded elts.
1 parent 5537b9a commit 73651ba

13 files changed

+65
-63
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1378313783
// so prevents folding a load into this instruction or making a copy.
1378413784
const int UnpackLoMask[] = {0, 0, 1, 1};
1378513785
const int UnpackHiMask[] = {2, 2, 3, 3};
13786-
if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
13787-
Mask = UnpackLoMask;
13788-
else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
13789-
Mask = UnpackHiMask;
13786+
if (!isSingleElementRepeatedMask(Mask)) {
13787+
if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
13788+
Mask = UnpackLoMask;
13789+
else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
13790+
Mask = UnpackHiMask;
13791+
}
1379013792

1379113793
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
1379213794
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
911911
; SSE2-NEXT: paddb (%rsi), %xmm0
912912
; SSE2-NEXT: paddb 16(%rsi), %xmm1
913913
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
914-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
914+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
915915
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
916916
; SSE2-NEXT: paddb (%rdx), %xmm0
917917
; SSE2-NEXT: movdqa %xmm0, (%rcx)
@@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
18981898
; SSE2-NEXT: paddb (%rsi), %xmm0
18991899
; SSE2-NEXT: paddb 32(%rsi), %xmm1
19001900
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1901-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1901+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
19021902
; SSE2-NEXT: movdqa %xmm0, %xmm3
19031903
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
19041904
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
@@ -4155,7 +4155,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
41554155
; SSE2-NEXT: paddb (%rsi), %xmm0
41564156
; SSE2-NEXT: paddb 48(%rsi), %xmm1
41574157
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
4158-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
4158+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
41594159
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
41604160
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
41614161
; SSE2-NEXT: paddb (%rdx), %xmm2

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
769769
; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2:
770770
; SSE2: # %bb.0:
771771
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
772-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
772+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
773773
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
774774
; SSE2-NEXT: paddb (%rsi), %xmm1
775775
; SSE2-NEXT: movdqa %xmm1, (%rdx)
@@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15221522
; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15231523
; SSE2: # %bb.0:
15241524
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
1525-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
1525+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
15261526
; SSE2-NEXT: movdqa %xmm1, %xmm2
15271527
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
15281528
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
@@ -3335,7 +3335,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
33353335
; SSE2: # %bb.0:
33363336
; SSE2-NEXT: movdqa (%rdi), %xmm0
33373337
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[1,3,2,3]
3338-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
3338+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
33393339
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
33403340
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
33413341
; SSE2-NEXT: paddb (%rsi), %xmm2

llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ define <8 x float> @foo8(<8 x float> %v, ptr%p) nounwind {
5858
define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
5959
; AVX2-LABEL: undef_splatmask:
6060
; AVX2: # %bb.0:
61-
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
61+
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
6262
; AVX2-NEXT: retq
6363
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
6464
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -68,7 +68,7 @@ define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
6868
define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
6969
; AVX2-LABEL: undef_splatmask2:
7070
; AVX2: # %bb.0:
71-
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
71+
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
7272
; AVX2-NEXT: retq
7373
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 undef>
7474
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -78,7 +78,7 @@ define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
7878
define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
7979
; AVX2-LABEL: undef_splatmask3:
8080
; AVX2: # %bb.0:
81-
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
81+
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
8282
; AVX2-NEXT: retq
8383
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
8484
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3>
@@ -88,7 +88,7 @@ define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
8888
define <4 x i32> @undef_splatmask4(<4 x i32> %v, ptr %p) nounwind {
8989
; AVX2-LABEL: undef_splatmask4:
9090
; AVX2: # %bb.0:
91-
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,3,3]
91+
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,2,2]
9292
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
9393
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
9494
; AVX2-NEXT: vmovaps %xmm1, %xmm0

llvm/test/CodeGen/X86/vec-strict-cmp-128.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,13 +1911,13 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
19111911
; SSE-32-NEXT: movl $0, %edx
19121912
; SSE-32-NEXT: cmoval %ecx, %edx
19131913
; SSE-32-NEXT: movd %edx, %xmm3
1914-
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
1914+
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
19151915
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
19161916
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
19171917
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
19181918
; SSE-32-NEXT: cmoval %ecx, %eax
19191919
; SSE-32-NEXT: movd %eax, %xmm2
1920-
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
1920+
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
19211921
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
19221922
; SSE-32-NEXT: pand %xmm3, %xmm0
19231923
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2031,13 +2031,13 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
20312031
; SSE-32-NEXT: movl $0, %edx
20322032
; SSE-32-NEXT: cmovael %ecx, %edx
20332033
; SSE-32-NEXT: movd %edx, %xmm3
2034-
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
2034+
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
20352035
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
20362036
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
20372037
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
20382038
; SSE-32-NEXT: cmovael %ecx, %eax
20392039
; SSE-32-NEXT: movd %eax, %xmm2
2040-
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
2040+
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
20412041
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
20422042
; SSE-32-NEXT: pand %xmm3, %xmm0
20432043
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2151,13 +2151,13 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
21512151
; SSE-32-NEXT: movl $0, %edx
21522152
; SSE-32-NEXT: cmoval %ecx, %edx
21532153
; SSE-32-NEXT: movd %edx, %xmm3
2154-
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
2154+
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
21552155
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
21562156
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
21572157
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
21582158
; SSE-32-NEXT: cmoval %ecx, %eax
21592159
; SSE-32-NEXT: movd %eax, %xmm2
2160-
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
2160+
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
21612161
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
21622162
; SSE-32-NEXT: pand %xmm3, %xmm0
21632163
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2269,13 +2269,13 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
22692269
; SSE-32-NEXT: movl $0, %edx
22702270
; SSE-32-NEXT: cmovael %ecx, %edx
22712271
; SSE-32-NEXT: movd %edx, %xmm3
2272-
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
2272+
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
22732273
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
22742274
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
22752275
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
22762276
; SSE-32-NEXT: cmovael %ecx, %eax
22772277
; SSE-32-NEXT: movd %eax, %xmm2
2278-
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
2278+
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
22792279
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
22802280
; SSE-32-NEXT: pand %xmm3, %xmm0
22812281
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2680,13 +2680,13 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
26802680
; SSE-32-NEXT: movl $0, %edx
26812681
; SSE-32-NEXT: cmovbl %ecx, %edx
26822682
; SSE-32-NEXT: movd %edx, %xmm3
2683-
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
2683+
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
26842684
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
26852685
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
26862686
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
26872687
; SSE-32-NEXT: cmovbl %ecx, %eax
26882688
; SSE-32-NEXT: movd %eax, %xmm2
2689-
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
2689+
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
26902690
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
26912691
; SSE-32-NEXT: pand %xmm3, %xmm0
26922692
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2798,13 +2798,13 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
27982798
; SSE-32-NEXT: movl $0, %edx
27992799
; SSE-32-NEXT: cmovbel %ecx, %edx
28002800
; SSE-32-NEXT: movd %edx, %xmm3
2801-
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
2801+
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
28022802
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
28032803
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
28042804
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
28052805
; SSE-32-NEXT: cmovbel %ecx, %eax
28062806
; SSE-32-NEXT: movd %eax, %xmm2
2807-
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
2807+
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
28082808
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
28092809
; SSE-32-NEXT: pand %xmm3, %xmm0
28102810
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2916,13 +2916,13 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
29162916
; SSE-32-NEXT: movl $0, %edx
29172917
; SSE-32-NEXT: cmovbl %ecx, %edx
29182918
; SSE-32-NEXT: movd %edx, %xmm3
2919-
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
2919+
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
29202920
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
29212921
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
29222922
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
29232923
; SSE-32-NEXT: cmovbl %ecx, %eax
29242924
; SSE-32-NEXT: movd %eax, %xmm2
2925-
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
2925+
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
29262926
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
29272927
; SSE-32-NEXT: pand %xmm3, %xmm0
29282928
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -3036,13 +3036,13 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
30363036
; SSE-32-NEXT: movl $0, %edx
30373037
; SSE-32-NEXT: cmovbel %ecx, %edx
30383038
; SSE-32-NEXT: movd %edx, %xmm3
3039-
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
3039+
; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
30403040
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
30413041
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
30423042
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
30433043
; SSE-32-NEXT: cmovbel %ecx, %eax
30443044
; SSE-32-NEXT: movd %eax, %xmm2
3045-
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
3045+
; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
30463046
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
30473047
; SSE-32-NEXT: pand %xmm3, %xmm0
30483048
; SSE-32-NEXT: pandn %xmm1, %xmm3

llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
162162
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
163163
; SSE2-LABEL: splatvar_funnnel_v2i32:
164164
; SSE2: # %bb.0:
165-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
165+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
166166
; SSE2-NEXT: pslld $23, %xmm1
167167
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
168168
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -182,7 +182,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
182182
;
183183
; SSE41-LABEL: splatvar_funnnel_v2i32:
184184
; SSE41: # %bb.0:
185-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
185+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
186186
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
187187
; SSE41-NEXT: pslld $23, %xmm1
188188
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -200,7 +200,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
200200
;
201201
; AVX1-LABEL: splatvar_funnnel_v2i32:
202202
; AVX1: # %bb.0:
203-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
203+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
204204
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
205205
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
206206
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
@@ -277,7 +277,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
277277
;
278278
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
279279
; XOPAVX1: # %bb.0:
280-
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
280+
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
281281
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
282282
; XOPAVX1-NEXT: retq
283283
;
@@ -289,7 +289,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
289289
;
290290
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
291291
; X86-SSE2: # %bb.0:
292-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
292+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
293293
; X86-SSE2-NEXT: pslld $23, %xmm1
294294
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
295295
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1

llvm/test/CodeGen/X86/vector-fshl-sub128.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
250250
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind {
251251
; SSE2-LABEL: splatvar_funnnel_v2i32:
252252
; SSE2: # %bb.0:
253-
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
253+
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
254254
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
255255
; SSE2-NEXT: movdqa %xmm3, %xmm5
256256
; SSE2-NEXT: pandn %xmm4, %xmm5
@@ -286,7 +286,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
286286
;
287287
; SSE41-LABEL: splatvar_funnnel_v2i32:
288288
; SSE41: # %bb.0:
289-
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
289+
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
290290
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31]
291291
; SSE41-NEXT: movdqa %xmm2, %xmm4
292292
; SSE41-NEXT: pandn %xmm3, %xmm4
@@ -316,7 +316,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
316316
;
317317
; AVX1-LABEL: splatvar_funnnel_v2i32:
318318
; AVX1: # %bb.0:
319-
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
319+
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
320320
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
321321
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
322322
; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -423,7 +423,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
423423
;
424424
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
425425
; XOPAVX1: # %bb.0:
426-
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
426+
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
427427
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
428428
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
429429
; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0
@@ -450,7 +450,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
450450
;
451451
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
452452
; X86-SSE2: # %bb.0:
453-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
453+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
454454
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
455455
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
456456
; X86-SSE2-NEXT: pandn %xmm4, %xmm5

llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
172172
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
173173
; SSE2-LABEL: splatvar_funnnel_v2i32:
174174
; SSE2: # %bb.0:
175-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
175+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
176176
; SSE2-NEXT: pxor %xmm2, %xmm2
177177
; SSE2-NEXT: psubd %xmm1, %xmm2
178178
; SSE2-NEXT: pslld $23, %xmm2
@@ -194,7 +194,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
194194
;
195195
; SSE41-LABEL: splatvar_funnnel_v2i32:
196196
; SSE41: # %bb.0:
197-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
197+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
198198
; SSE41-NEXT: pxor %xmm2, %xmm2
199199
; SSE41-NEXT: psubd %xmm1, %xmm2
200200
; SSE41-NEXT: pslld $23, %xmm2
@@ -214,7 +214,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
214214
;
215215
; AVX1-LABEL: splatvar_funnnel_v2i32:
216216
; AVX1: # %bb.0:
217-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
217+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
218218
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
219219
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
220220
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
@@ -293,7 +293,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
293293
;
294294
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
295295
; XOPAVX1: # %bb.0:
296-
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
296+
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
297297
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
298298
; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
299299
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
@@ -309,7 +309,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
309309
;
310310
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
311311
; X86-SSE2: # %bb.0:
312-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
312+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
313313
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
314314
; X86-SSE2-NEXT: psubd %xmm1, %xmm2
315315
; X86-SSE2-NEXT: pslld $23, %xmm2

0 commit comments

Comments
 (0)