Skip to content

Commit 4f8fdf7

Browse files
committed
[ISEL] Canonicalise constant splats to RHS.
SelectionDAG::getNode() canonicalises constants to the RHS if the operation is commutative, but it doesn't do so for constant splat vectors. Doing this early helps making certain folds on vector types, simplifying the code required for target DAGCombines that are enabled before Type legalization. Somewhat to my surprise, DAGCombine doesn't seem to traverse the DAG in a post-order DFS, so at the time of doing some custom fold where the input is a MUL, DAGCombiner::visitMUL hasn't yet reordered the constant splat to the RHS. This patch leads to a few improvements, but also a few minor regressions, which I traced down to D46492. When I tried reverting this change to see if the changes were still necessary, I ran into some segfaults. Not sure if there is some latent bug there. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117794
1 parent 7ccacaf commit 4f8fdf7

11 files changed

+75
-76
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5610,22 +5610,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
56105610
assert(N1.getOpcode() != ISD::DELETED_NODE &&
56115611
N2.getOpcode() != ISD::DELETED_NODE &&
56125612
"Operand is DELETED_NODE!");
5613-
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5614-
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
5615-
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
5616-
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
5617-
56185613
// Canonicalize constant to RHS if commutative.
56195614
if (TLI->isCommutativeBinOp(Opcode)) {
5620-
if (N1C && !N2C) {
5621-
std::swap(N1C, N2C);
5622-
std::swap(N1, N2);
5623-
} else if (N1CFP && !N2CFP) {
5624-
std::swap(N1CFP, N2CFP);
5615+
bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
5616+
bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
5617+
bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
5618+
bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
5619+
if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
56255620
std::swap(N1, N2);
5626-
}
56275621
}
56285622

5623+
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5624+
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
5625+
56295626
switch (Opcode) {
56305627
default: break;
56315628
case ISD::TokenFactor:

llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
126126
define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
127127
; CHECK-LABEL: in_constant_mone_vary:
128128
; CHECK: // %bb.0:
129-
; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b
129+
; CHECK-NEXT: bic v0.16b, v2.16b, v1.16b
130+
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
130131
; CHECK-NEXT: ret
131132
%n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
132133
%n1 = and <4 x i32> %n0, %mask
@@ -152,8 +153,9 @@ define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4
152153
define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
153154
; CHECK-LABEL: in_constant_mone_vary_invmask:
154155
; CHECK: // %bb.0:
155-
; CHECK-NEXT: and v0.16b, v1.16b, v2.16b
156-
; CHECK-NEXT: orn v0.16b, v0.16b, v2.16b
156+
; CHECK-NEXT: mvn v0.16b, v1.16b
157+
; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b
158+
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
157159
; CHECK-NEXT: ret
158160
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
159161
%n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x

llvm/test/CodeGen/PowerPC/combine-fneg.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
1313
; CHECK-NEXT: xvredp 2, 0
1414
; CHECK-NEXT: xxswapd 1, 1
1515
; CHECK-NEXT: xxlor 3, 1, 1
16-
; CHECK-NEXT: xvmaddadp 3, 0, 2
17-
; CHECK-NEXT: xvnmsubadp 2, 2, 3
18-
; CHECK-NEXT: xvmaddadp 1, 0, 2
19-
; CHECK-NEXT: xvmsubadp 2, 2, 1
16+
; CHECK-NEXT: xvnmsubadp 3, 0, 2
17+
; CHECK-NEXT: xvmaddadp 2, 2, 3
18+
; CHECK-NEXT: xvnmsubadp 1, 0, 2
19+
; CHECK-NEXT: xvnmaddadp 2, 2, 1
2020
; CHECK-NEXT: xvmuldp 34, 34, 2
2121
; CHECK-NEXT: xvmuldp 35, 35, 2
2222
; CHECK-NEXT: blr

llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
3636
; CHECK-NEXT: lvx 4, 0, 3
3737
; CHECK-NEXT: xxspltw 0, 0, 0
3838
; CHECK-NEXT: xvresp 1, 0
39-
; CHECK-NEXT: xvnmsubasp 35, 0, 1
39+
; CHECK-NEXT: xvmaddasp 35, 0, 1
4040
; CHECK-NEXT: xvmulsp 0, 34, 36
41-
; CHECK-NEXT: xvmaddasp 1, 1, 35
41+
; CHECK-NEXT: xvnmsubasp 1, 1, 35
4242
; CHECK-NEXT: xvmulsp 34, 0, 1
4343
; CHECK-NEXT: blr
4444
%ins = insertelement <4 x float> undef, float %a, i32 0

llvm/test/CodeGen/X86/dpbusd_const.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
6868
; AVXVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
6969
; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
7070
; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
71-
; AVXVNNI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
72-
; AVXVNNI-NEXT: {vex} vpdpbusd %xmm0, %xmm2, %xmm1
71+
; AVXVNNI-NEXT: {vex} vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
7372
; AVXVNNI-NEXT: vmovd %xmm1, %eax
7473
; AVXVNNI-NEXT: addl %edi, %eax
7574
; AVXVNNI-NEXT: retq
@@ -80,10 +79,9 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
8079
; AVX512VNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
8180
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
8281
; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
83-
; AVX512VNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
84-
; AVX512VNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
85-
; AVX512VNNI-NEXT: vpdpbusd %zmm0, %zmm1, %zmm2
86-
; AVX512VNNI-NEXT: vmovd %xmm2, %eax
82+
; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
83+
; AVX512VNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
84+
; AVX512VNNI-NEXT: vmovd %xmm1, %eax
8785
; AVX512VNNI-NEXT: addl %edi, %eax
8886
; AVX512VNNI-NEXT: vzeroupper
8987
; AVX512VNNI-NEXT: retq
@@ -92,10 +90,9 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
9290
; AVX512VLVNNI: # %bb.0: # %entry
9391
; AVX512VLVNNI-NEXT: vpmovdb %xmm0, %xmm0
9492
; AVX512VLVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
95-
; AVX512VLVNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
96-
; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2
97-
; AVX512VLVNNI-NEXT: vpdpbusd %xmm0, %xmm1, %xmm2
98-
; AVX512VLVNNI-NEXT: vmovd %xmm2, %eax
93+
; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1
94+
; AVX512VLVNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
95+
; AVX512VLVNNI-NEXT: vmovd %xmm1, %eax
9996
; AVX512VLVNNI-NEXT: addl %edi, %eax
10097
; AVX512VLVNNI-NEXT: retq
10198
entry:

llvm/test/CodeGen/X86/extractelement-fp.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,7 +1070,7 @@ define float @round_v4f32(<4 x float> %x) nounwind {
10701070
; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
10711071
; X64-NEXT: vandps %xmm1, %xmm0, %xmm1
10721072
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
1073-
; X64-NEXT: vorps %xmm1, %xmm2, %xmm1
1073+
; X64-NEXT: vorps %xmm2, %xmm1, %xmm1
10741074
; X64-NEXT: vaddss %xmm1, %xmm0, %xmm0
10751075
; X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
10761076
; X64-NEXT: retq
@@ -1081,7 +1081,7 @@ define float @round_v4f32(<4 x float> %x) nounwind {
10811081
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
10821082
; X86-NEXT: vandps %xmm1, %xmm0, %xmm1
10831083
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
1084-
; X86-NEXT: vorps %xmm1, %xmm2, %xmm1
1084+
; X86-NEXT: vorps %xmm2, %xmm1, %xmm1
10851085
; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0
10861086
; X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
10871087
; X86-NEXT: vmovss %xmm0, (%esp)
@@ -1099,7 +1099,7 @@ define double @round_v4f64(<4 x double> %x) nounwind {
10991099
; X64-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
11001100
; X64-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
11011101
; X64-NEXT: # xmm2 = mem[0,0]
1102-
; X64-NEXT: vorpd %xmm1, %xmm2, %xmm1
1102+
; X64-NEXT: vorpd %xmm2, %xmm1, %xmm1
11031103
; X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
11041104
; X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
11051105
; X64-NEXT: vzeroupper
@@ -1114,7 +1114,7 @@ define double @round_v4f64(<4 x double> %x) nounwind {
11141114
; X86-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
11151115
; X86-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
11161116
; X86-NEXT: # xmm2 = mem[0,0]
1117-
; X86-NEXT: vorpd %xmm1, %xmm2, %xmm1
1117+
; X86-NEXT: vorpd %xmm2, %xmm1, %xmm1
11181118
; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0
11191119
; X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
11201120
; X86-NEXT: vmovsd %xmm0, (%esp)

llvm/test/CodeGen/X86/fp-round.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define half @round_f16(half %h) {
4141
; AVX1-NEXT: callq ___extendhfsf2
4242
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
4343
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
44-
; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1
44+
; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1
4545
; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0
4646
; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
4747
; AVX1-NEXT: callq ___truncsfhf2
@@ -94,7 +94,7 @@ define float @round_f32(float %x) {
9494
; AVX1: ## %bb.0:
9595
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
9696
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
97-
; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1
97+
; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1
9898
; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0
9999
; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
100100
; AVX1-NEXT: retq
@@ -130,7 +130,7 @@ define double @round_f64(double %x) {
130130
; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
131131
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
132132
; AVX1-NEXT: ## xmm2 = mem[0,0]
133-
; AVX1-NEXT: vorpd %xmm1, %xmm2, %xmm1
133+
; AVX1-NEXT: vorpd %xmm2, %xmm1, %xmm1
134134
; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0
135135
; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
136136
; AVX1-NEXT: retq
@@ -521,11 +521,11 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
521521
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
522522
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm3
523523
; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
524-
; AVX1-NEXT: vorps %ymm3, %ymm4, %ymm3
524+
; AVX1-NEXT: vorps %ymm4, %ymm3, %ymm3
525525
; AVX1-NEXT: vaddps %ymm3, %ymm0, %ymm0
526526
; AVX1-NEXT: vroundps $11, %ymm0, %ymm0
527527
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm2
528-
; AVX1-NEXT: vorps %ymm2, %ymm4, %ymm2
528+
; AVX1-NEXT: vorps %ymm4, %ymm2, %ymm2
529529
; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1
530530
; AVX1-NEXT: vroundps $11, %ymm1, %ymm1
531531
; AVX1-NEXT: retq
@@ -620,11 +620,11 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
620620
; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
621621
; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm3
622622
; AVX1-NEXT: vmovapd {{.*#+}} ymm4 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
623-
; AVX1-NEXT: vorpd %ymm3, %ymm4, %ymm3
623+
; AVX1-NEXT: vorpd %ymm4, %ymm3, %ymm3
624624
; AVX1-NEXT: vaddpd %ymm3, %ymm0, %ymm0
625625
; AVX1-NEXT: vroundpd $11, %ymm0, %ymm0
626626
; AVX1-NEXT: vandpd %ymm2, %ymm1, %ymm2
627-
; AVX1-NEXT: vorpd %ymm2, %ymm4, %ymm2
627+
; AVX1-NEXT: vorpd %ymm4, %ymm2, %ymm2
628628
; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1
629629
; AVX1-NEXT: vroundpd $11, %ymm1, %ymm1
630630
; AVX1-NEXT: retq

llvm/test/CodeGen/X86/fp128-cast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1326,7 +1326,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind {
13261326
; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
13271327
; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [+Inf,+Inf]
13281328
; X64-AVX-NEXT: # xmm1 = mem[0,0]
1329-
; X64-AVX-NEXT: vorps %xmm0, %xmm1, %xmm0
1329+
; X64-AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
13301330
; X64-AVX-NEXT: callq __extenddftf2@PLT
13311331
; X64-AVX-NEXT: addq $8, %rsp
13321332
; X64-AVX-NEXT: .LBB26_2: # %cleanup

llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -465,9 +465,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
465465
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
466466
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
467467
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
468+
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
469+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
468470
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3
469-
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2
470-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
471471
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
472472
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
473473
; X86-SSE2-NEXT: pand %xmm1, %xmm0
@@ -491,9 +491,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
491491
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
492492
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
493493
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
494+
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm1
495+
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
494496
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3
495-
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2
496-
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
497497
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
498498
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
499499
; X64-SSE2-NEXT: pand %xmm1, %xmm0
@@ -611,9 +611,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
611611
; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
612612
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
613613
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
614+
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1
615+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
614616
; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3
615-
; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2
616-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
617617
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
618618
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
619619
; X86-SSE2-NEXT: pand %xmm1, %xmm0
@@ -637,9 +637,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
637637
; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1
638638
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
639639
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
640+
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm1
641+
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
640642
; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3
641-
; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2
642-
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
643643
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
644644
; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
645645
; X64-SSE2-NEXT: pand %xmm1, %xmm0

llvm/test/CodeGen/X86/pr43509.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44
define <8 x i8> @foo(<8 x float> %arg) {
55
; CHECK-LABEL: foo:
66
; CHECK: # %bb.0: # %bb
7-
; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
8-
; CHECK-NEXT: vpmovm2b %k0, %xmm1
9-
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
10-
; CHECK-NEXT: vcmpltps %ymm2, %ymm0, %k1
7+
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
8+
; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %k1
9+
; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
1110
; CHECK-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
12-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
1311
; CHECK-NEXT: vzeroupper
1412
; CHECK-NEXT: retq
1513
bb:

0 commit comments

Comments
 (0)