Skip to content

Commit 4ece4cd

Browse files
committed
[ARM] Fold away CMP/CSINC from CMOV
This makes use of the code in D114013 to fold away unnecessary CMPZ/CSINC starting from a CMOV, in a similar way to how we fold away CSINV/CSINC/etc Differential Revision: https://reviews.llvm.org/D115185
1 parent 555eacf commit 4ece4cd

File tree

5 files changed

+43
-44
lines changed

5 files changed

+43
-44
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18027,6 +18027,23 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
1802718027
if (!VT.isInteger())
1802818028
return SDValue();
1802918029

18030+
// Fold away an unneccessary CMPZ/CMOV
18031+
// CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18032+
// if C1==EQ -> CMOV A, B, C2, $cpsr, D
18033+
// if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18034+
if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18035+
N->getConstantOperandVal(2) == ARMCC::NE) {
18036+
ARMCC::CondCodes Cond;
18037+
if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18038+
if (N->getConstantOperandVal(2) == ARMCC::NE)
18039+
Cond = ARMCC::getOppositeCondition(Cond);
18040+
return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18041+
N->getOperand(1),
18042+
DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18043+
N->getOperand(3), C);
18044+
}
18045+
}
18046+
1803018047
// Materialize a boolean comparison for integers so we can avoid branching.
1803118048
if (isNullConstant(FalseVal)) {
1803218049
if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {

llvm/test/CodeGen/Thumb2/mve-pred-or.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -380,24 +380,22 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
380380
; CHECK-NEXT: vmov r0, r1, d0
381381
; CHECK-NEXT: orrs r0, r1
382382
; CHECK-NEXT: vmov r1, r2, d2
383-
; CHECK-NEXT: cset r0, eq
384383
; CHECK-NEXT: orrs r1, r2
385384
; CHECK-NEXT: cset r1, eq
386385
; CHECK-NEXT: cmp r0, #0
387-
; CHECK-NEXT: it ne
388-
; CHECK-NEXT: movne r1, #1
386+
; CHECK-NEXT: it eq
387+
; CHECK-NEXT: moveq r1, #1
389388
; CHECK-NEXT: rsbs r0, r1, #0
390389
; CHECK-NEXT: movs r1, #0
391390
; CHECK-NEXT: bfi r1, r0, #0, #8
392391
; CHECK-NEXT: vmov r0, r2, d1
393392
; CHECK-NEXT: orrs r0, r2
394393
; CHECK-NEXT: vmov r2, r3, d3
395-
; CHECK-NEXT: cset r0, eq
396394
; CHECK-NEXT: orrs r2, r3
397395
; CHECK-NEXT: cset r2, eq
398396
; CHECK-NEXT: cmp r0, #0
399-
; CHECK-NEXT: it ne
400-
; CHECK-NEXT: movne r2, #1
397+
; CHECK-NEXT: it eq
398+
; CHECK-NEXT: moveq r2, #1
401399
; CHECK-NEXT: rsbs r0, r2, #0
402400
; CHECK-NEXT: bfi r1, r0, #8, #8
403401
; CHECK-NEXT: vmsr p0, r1
@@ -422,25 +420,21 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6
422420
; CHECK-NEXT: vmov r1, r2, d0
423421
; CHECK-NEXT: cset r0, eq
424422
; CHECK-NEXT: orrs r1, r2
425-
; CHECK-NEXT: vmov r12, r2, d5
426-
; CHECK-NEXT: cset r1, eq
427-
; CHECK-NEXT: cmp r1, #0
428-
; CHECK-NEXT: it ne
429-
; CHECK-NEXT: movne r0, #1
423+
; CHECK-NEXT: it eq
424+
; CHECK-NEXT: moveq r0, #1
430425
; CHECK-NEXT: rsbs r0, r0, #0
431426
; CHECK-NEXT: movs r1, #0
432427
; CHECK-NEXT: bfi r1, r0, #0, #8
428+
; CHECK-NEXT: vmov r12, r2, d5
433429
; CHECK-NEXT: vmov r3, r0, d3
434430
; CHECK-NEXT: eors r0, r2
435431
; CHECK-NEXT: eor.w r2, r3, r12
436432
; CHECK-NEXT: orrs r0, r2
437433
; CHECK-NEXT: vmov r2, r3, d1
438434
; CHECK-NEXT: cset r0, eq
439435
; CHECK-NEXT: orrs r2, r3
440-
; CHECK-NEXT: cset r2, eq
441-
; CHECK-NEXT: cmp r2, #0
442-
; CHECK-NEXT: it ne
443-
; CHECK-NEXT: movne r0, #1
436+
; CHECK-NEXT: it eq
437+
; CHECK-NEXT: moveq r0, #1
444438
; CHECK-NEXT: rsbs r0, r0, #0
445439
; CHECK-NEXT: bfi r1, r0, #8, #8
446440
; CHECK-NEXT: vmsr p0, r1

llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,28 +73,26 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i
7373
; CHECK-NEXT: vmov r0, r1, d4
7474
; CHECK-NEXT: orrs r0, r1
7575
; CHECK-NEXT: vmov r1, r2, d0
76-
; CHECK-NEXT: cset r0, eq
7776
; CHECK-NEXT: orrs r1, r2
7877
; CHECK-NEXT: vmov r2, r3, d2
7978
; CHECK-NEXT: cset r1, eq
8079
; CHECK-NEXT: orrs r2, r3
8180
; CHECK-NEXT: cset r2, eq
8281
; CHECK-NEXT: cmp r0, #0
83-
; CHECK-NEXT: csel r0, r1, r2, ne
82+
; CHECK-NEXT: csel r0, r1, r2, eq
8483
; CHECK-NEXT: movs r1, #0
8584
; CHECK-NEXT: rsbs r0, r0, #0
8685
; CHECK-NEXT: bfi r1, r0, #0, #8
8786
; CHECK-NEXT: vmov r0, r2, d5
88-
; CHECK-NEXT: orrs r0, r2
87+
; CHECK-NEXT: orr.w r12, r0, r2
8988
; CHECK-NEXT: vmov r2, r3, d1
90-
; CHECK-NEXT: cset r12, eq
9189
; CHECK-NEXT: orrs r2, r3
9290
; CHECK-NEXT: vmov r3, r0, d3
9391
; CHECK-NEXT: cset r2, eq
9492
; CHECK-NEXT: orrs r0, r3
9593
; CHECK-NEXT: cset r0, eq
9694
; CHECK-NEXT: cmp.w r12, #0
97-
; CHECK-NEXT: csel r0, r2, r0, ne
95+
; CHECK-NEXT: csel r0, r2, r0, eq
9896
; CHECK-NEXT: rsbs r0, r0, #0
9997
; CHECK-NEXT: bfi r1, r0, #8, #8
10098
; CHECK-NEXT: vmsr p0, r1

llvm/test/CodeGen/Thumb2/mve-pred-xor.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -460,24 +460,22 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) {
460460
; CHECK-NEXT: vmov r0, r1, d0
461461
; CHECK-NEXT: orrs r0, r1
462462
; CHECK-NEXT: vmov r1, r2, d2
463-
; CHECK-NEXT: cset r0, eq
464463
; CHECK-NEXT: orrs r1, r2
465464
; CHECK-NEXT: cset r1, eq
466465
; CHECK-NEXT: cmp r0, #0
467-
; CHECK-NEXT: it ne
468-
; CHECK-NEXT: eorne r1, r1, #1
466+
; CHECK-NEXT: it eq
467+
; CHECK-NEXT: eoreq r1, r1, #1
469468
; CHECK-NEXT: rsbs r0, r1, #0
470469
; CHECK-NEXT: movs r1, #0
471470
; CHECK-NEXT: bfi r1, r0, #0, #8
472471
; CHECK-NEXT: vmov r0, r2, d1
473472
; CHECK-NEXT: orrs r0, r2
474473
; CHECK-NEXT: vmov r2, r3, d3
475-
; CHECK-NEXT: cset r0, eq
476474
; CHECK-NEXT: orrs r2, r3
477475
; CHECK-NEXT: cset r2, eq
478476
; CHECK-NEXT: cmp r0, #0
479-
; CHECK-NEXT: it ne
480-
; CHECK-NEXT: eorne r2, r2, #1
477+
; CHECK-NEXT: it eq
478+
; CHECK-NEXT: eoreq r2, r2, #1
481479
; CHECK-NEXT: rsbs r0, r2, #0
482480
; CHECK-NEXT: bfi r1, r0, #8, #8
483481
; CHECK-NEXT: vmsr p0, r1
@@ -502,25 +500,21 @@ define arm_aapcs_vfpcc <2 x i64> @cmpeq_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i6
502500
; CHECK-NEXT: vmov r1, r2, d0
503501
; CHECK-NEXT: cset r0, eq
504502
; CHECK-NEXT: orrs r1, r2
505-
; CHECK-NEXT: vmov r12, r2, d5
506-
; CHECK-NEXT: cset r1, eq
507-
; CHECK-NEXT: cmp r1, #0
508-
; CHECK-NEXT: it ne
509-
; CHECK-NEXT: eorne r0, r0, #1
503+
; CHECK-NEXT: it eq
504+
; CHECK-NEXT: eoreq r0, r0, #1
510505
; CHECK-NEXT: rsbs r0, r0, #0
511506
; CHECK-NEXT: movs r1, #0
512507
; CHECK-NEXT: bfi r1, r0, #0, #8
508+
; CHECK-NEXT: vmov r12, r2, d5
513509
; CHECK-NEXT: vmov r3, r0, d3
514510
; CHECK-NEXT: eors r0, r2
515511
; CHECK-NEXT: eor.w r2, r3, r12
516512
; CHECK-NEXT: orrs r0, r2
517513
; CHECK-NEXT: vmov r2, r3, d1
518514
; CHECK-NEXT: cset r0, eq
519515
; CHECK-NEXT: orrs r2, r3
520-
; CHECK-NEXT: cset r2, eq
521-
; CHECK-NEXT: cmp r2, #0
522-
; CHECK-NEXT: it ne
523-
; CHECK-NEXT: eorne r0, r0, #1
516+
; CHECK-NEXT: it eq
517+
; CHECK-NEXT: eoreq r0, r0, #1
524518
; CHECK-NEXT: rsbs r0, r0, #0
525519
; CHECK-NEXT: bfi r1, r0, #8, #8
526520
; CHECK-NEXT: vmsr p0, r1

llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,10 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
4747
; CHECK-NEXT: cmp r1, #0
4848
; CHECK-NEXT: cset r1, ne
4949
; CHECK-NEXT: cmp r2, #0
50-
; CHECK-NEXT: cset r2, mi
51-
; CHECK-NEXT: cmp r2, #0
52-
; CHECK-NEXT: it ne
53-
; CHECK-NEXT: eorne r1, r1, #1
54-
; CHECK-NEXT: rsbs r1, r1, #0
50+
; CHECK-NEXT: it mi
51+
; CHECK-NEXT: eormi r1, r1, #1
5552
; CHECK-NEXT: movs r2, #0
53+
; CHECK-NEXT: rsbs r1, r1, #0
5654
; CHECK-NEXT: bfi r2, r1, #0, #8
5755
; CHECK-NEXT: vmov r1, r3, d3
5856
; CHECK-NEXT: adds r1, r1, r0
@@ -66,10 +64,8 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2)
6664
; CHECK-NEXT: vmov q0[3], q0[1], lr, r5
6765
; CHECK-NEXT: cset r0, ne
6866
; CHECK-NEXT: cmp r3, #0
69-
; CHECK-NEXT: cset r3, mi
70-
; CHECK-NEXT: cmp r3, #0
71-
; CHECK-NEXT: it ne
72-
; CHECK-NEXT: eorne r0, r0, #1
67+
; CHECK-NEXT: it mi
68+
; CHECK-NEXT: eormi r0, r0, #1
7369
; CHECK-NEXT: rsbs r0, r0, #0
7470
; CHECK-NEXT: bfi r2, r0, #8, #8
7571
; CHECK-NEXT: asrs r0, r5, #31

0 commit comments

Comments
 (0)