Skip to content

Commit 2476e2a

Browse files
committed
[M68k] Optimize for overflow arithmetics that will never overflow
We lower overflow arithmetics to its M68kISD counterparts that produce results of {i16/i32, i8} in which the second resut represents CCR. In the event where we're certain there won't be an overflow, for instance 8 & 16-bit multiplications, we simply use zero in replacement of the second result. This patch replaces M68kISD::CMOV that takes this kind of zero or all-ones CCR as condition value with its corresponding operand value.
1 parent 6f85075 commit 2476e2a

File tree

3 files changed

+56
-39
lines changed

3 files changed

+56
-39
lines changed

llvm/lib/Target/M68k/M68kISelLowering.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1637,7 +1637,7 @@ SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
16371637
if (isa<ConstantSDNode>(CCR)) {
16381638
// It's likely a result of operations that will not overflow
16391639
// hence no setcc is needed.
1640-
Overflow = DAG.getZExtOrTrunc(CCR, DL, N->getValueType(1));
1640+
Overflow = CCR;
16411641
} else {
16421642
// Generate a M68kISD::SETCC.
16431643
Overflow = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1),
@@ -2406,6 +2406,17 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
24062406
}
24072407
}
24082408

2409+
// Simple optimization when Cond is a constant to avoid generating
2410+
// M68kISD::CMOV if possible.
2411+
// TODO: Generalize this to use SelectionDAG::computeKnownBits.
2412+
if (auto *Const = dyn_cast<ConstantSDNode>(Cond.getNode())) {
2413+
const APInt &C = Const->getAPIntValue();
2414+
if (C.countr_zero() >= 5)
2415+
return Op2;
2416+
else if (C.countr_one() >= 5)
2417+
return Op1;
2418+
}
2419+
24092420
// M68kISD::CMOV means set the result (which is operand 1) to the RHS if
24102421
// condition is true.
24112422
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);

llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,28 @@
44
define zeroext i8 @smul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
55
; CHECK-LABEL: smul_i8:
66
; CHECK: ; %bb.0: ; %entry
7-
; CHECK-NEXT: move.b (11,%sp), %d1
8-
; CHECK-NEXT: and.l #255, %d1
9-
; CHECK-NEXT: move.b (7,%sp), %d0
7+
; CHECK-NEXT: move.b (11,%sp), %d0
108
; CHECK-NEXT: and.l #255, %d0
11-
; CHECK-NEXT: muls %d1, %d0
12-
; CHECK-NEXT: move.b #0, %d1
13-
; CHECK-NEXT: move.w %d1, %ccr
14-
; CHECK-NEXT: bvs .LBB0_2
15-
; CHECK-NEXT: ; %bb.1: ; %entry
16-
; CHECK-NEXT: move.b #42, %d0
17-
; CHECK-NEXT: .LBB0_2: ; %entry
9+
; CHECK-NEXT: move.b (7,%sp), %d1
10+
; CHECK-NEXT: and.l #255, %d1
11+
; CHECK-NEXT: muls %d0, %d1
12+
; CHECK-NEXT: move.l %d1, %d0
13+
; CHECK-NEXT: and.l #65535, %d0
1814
; CHECK-NEXT: and.l #255, %d0
1915
; CHECK-NEXT: rts
16+
entry:
17+
%smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
18+
%cmp = extractvalue { i8, i1 } %smul, 1
19+
%smul.result = extractvalue { i8, i1 } %smul, 0
20+
%X = select i1 %cmp, i8 42, i8 %smul.result
21+
ret i8 %X
22+
}
23+
24+
define zeroext i8 @smul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
25+
; CHECK-LABEL: smul_i8_no_ovf:
26+
; CHECK: ; %bb.0: ; %entry
27+
; CHECK-NEXT: move.l #42, %d0
28+
; CHECK-NEXT: rts
2029
entry:
2130
%smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
2231
%cmp = extractvalue { i8, i1 } %smul, 1
@@ -33,19 +42,13 @@ define zeroext i16 @smul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
3342
; CHECK-NEXT: move.w (6,%sp), %d0
3443
; CHECK-NEXT: move.w (10,%sp), %d1
3544
; CHECK-NEXT: muls %d1, %d0
36-
; CHECK-NEXT: move.b #0, %d1
37-
; CHECK-NEXT: move.w %d1, %ccr
38-
; CHECK-NEXT: bvs .LBB1_2
39-
; CHECK-NEXT: ; %bb.1: ; %entry
40-
; CHECK-NEXT: move.w #42, %d0
41-
; CHECK-NEXT: .LBB1_2: ; %entry
4245
; CHECK-NEXT: and.l #65535, %d0
4346
; CHECK-NEXT: rts
4447
entry:
4548
%smul = tail call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
4649
%cmp = extractvalue { i16, i1 } %smul, 1
4750
%smul.result = extractvalue { i16, i1 } %smul, 0
48-
%X = select i1 %cmp, i16 %smul.result, i16 42
51+
%X = select i1 %cmp, i16 42, i16 %smul.result
4952
ret i16 %X
5053
}
5154

@@ -62,15 +65,15 @@ define fastcc i1 @test1(i32 %v1, i32 %v2) nounwind {
6265
; CHECK: ; %bb.0: ; %entry
6366
; CHECK-NEXT: suba.l #12, %sp
6467
; CHECK-NEXT: muls.l %d1, %d0
65-
; CHECK-NEXT: bvc .LBB2_1
68+
; CHECK-NEXT: bvc .LBB3_1
6669
; CHECK-NEXT: ; %bb.2: ; %overflow
6770
; CHECK-NEXT: lea (no,%pc), %a0
6871
; CHECK-NEXT: move.l %a0, (%sp)
6972
; CHECK-NEXT: jsr printf@PLT
7073
; CHECK-NEXT: move.b #0, %d0
7174
; CHECK-NEXT: adda.l #12, %sp
7275
; CHECK-NEXT: rts
73-
; CHECK-NEXT: .LBB2_1: ; %normal
76+
; CHECK-NEXT: .LBB3_1: ; %normal
7477
; CHECK-NEXT: move.l %d0, (4,%sp)
7578
; CHECK-NEXT: lea (ok,%pc), %a0
7679
; CHECK-NEXT: move.l %a0, (%sp)
@@ -100,15 +103,15 @@ define fastcc i1 @test2(i32 %v1, i32 %v2) nounwind {
100103
; CHECK-NEXT: muls.l %d1, %d0
101104
; CHECK-NEXT: svs %d1
102105
; CHECK-NEXT: sub.b #1, %d1
103-
; CHECK-NEXT: bne .LBB3_2
106+
; CHECK-NEXT: bne .LBB4_2
104107
; CHECK-NEXT: ; %bb.1: ; %overflow
105108
; CHECK-NEXT: lea (no,%pc), %a0
106109
; CHECK-NEXT: move.l %a0, (%sp)
107110
; CHECK-NEXT: jsr printf@PLT
108111
; CHECK-NEXT: move.b #0, %d0
109112
; CHECK-NEXT: adda.l #12, %sp
110113
; CHECK-NEXT: rts
111-
; CHECK-NEXT: .LBB3_2: ; %normal
114+
; CHECK-NEXT: .LBB4_2: ; %normal
112115
; CHECK-NEXT: move.l %d0, (4,%sp)
113116
; CHECK-NEXT: lea (ok,%pc), %a0
114117
; CHECK-NEXT: move.l %a0, (%sp)

llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,28 @@
44
define zeroext i8 @umul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
55
; CHECK-LABEL: umul_i8:
66
; CHECK: ; %bb.0: ; %entry
7-
; CHECK-NEXT: move.b (11,%sp), %d1
8-
; CHECK-NEXT: and.l #255, %d1
9-
; CHECK-NEXT: move.b (7,%sp), %d0
7+
; CHECK-NEXT: move.b (11,%sp), %d0
108
; CHECK-NEXT: and.l #255, %d0
11-
; CHECK-NEXT: muls %d1, %d0
12-
; CHECK-NEXT: move.b #0, %d1
13-
; CHECK-NEXT: move.w %d1, %ccr
14-
; CHECK-NEXT: bvs .LBB0_2
15-
; CHECK-NEXT: ; %bb.1: ; %entry
16-
; CHECK-NEXT: move.b #42, %d0
17-
; CHECK-NEXT: .LBB0_2: ; %entry
9+
; CHECK-NEXT: move.b (7,%sp), %d1
10+
; CHECK-NEXT: and.l #255, %d1
11+
; CHECK-NEXT: muls %d0, %d1
12+
; CHECK-NEXT: move.l %d1, %d0
13+
; CHECK-NEXT: and.l #65535, %d0
1814
; CHECK-NEXT: and.l #255, %d0
1915
; CHECK-NEXT: rts
16+
entry:
17+
%umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
18+
%cmp = extractvalue { i8, i1 } %umul, 1
19+
%umul.result = extractvalue { i8, i1 } %umul, 0
20+
%X = select i1 %cmp, i8 42, i8 %umul.result
21+
ret i8 %X
22+
}
23+
24+
define zeroext i8 @umul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
25+
; CHECK-LABEL: umul_i8_no_ovf:
26+
; CHECK: ; %bb.0: ; %entry
27+
; CHECK-NEXT: move.l #42, %d0
28+
; CHECK-NEXT: rts
2029
entry:
2130
%umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
2231
%cmp = extractvalue { i8, i1 } %umul, 1
@@ -33,19 +42,13 @@ define zeroext i16 @umul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
3342
; CHECK-NEXT: move.w (6,%sp), %d0
3443
; CHECK-NEXT: move.w (10,%sp), %d1
3544
; CHECK-NEXT: muls %d1, %d0
36-
; CHECK-NEXT: move.b #0, %d1
37-
; CHECK-NEXT: move.w %d1, %ccr
38-
; CHECK-NEXT: bvs .LBB1_2
39-
; CHECK-NEXT: ; %bb.1: ; %entry
40-
; CHECK-NEXT: move.w #42, %d0
41-
; CHECK-NEXT: .LBB1_2: ; %entry
4245
; CHECK-NEXT: and.l #65535, %d0
4346
; CHECK-NEXT: rts
4447
entry:
4548
%umul = tail call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
4649
%cmp = extractvalue { i16, i1 } %umul, 1
4750
%umul.result = extractvalue { i16, i1 } %umul, 0
48-
%X = select i1 %cmp, i16 %umul.result, i16 42
51+
%X = select i1 %cmp, i16 42, i16 %umul.result
4952
ret i16 %X
5053
}
5154

0 commit comments

Comments
 (0)