[M68k] Optimize for overflow arithmetics that will never overflow

mshockwave · mshockwave · commit 2476e2a91140 · 2023-12-26T20:55:23.000-08:00
We lower overflow arithmetics to its M68kISD counterparts that produce
results of {i16/i32, i8} in which the second resut represents CCR. In
the event where we're certain there won't be an overflow, for instance
8 &amp; 16-bit multiplications, we simply use zero in replacement of the
second result.
This patch replaces M68kISD::CMOV that takes this kind of zero or
all-ones CCR as condition value with its corresponding operand value.
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -1637,7 +1637,7 @@ SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   if (isa<ConstantSDNode>(CCR)) {
     // It's likely a result of operations that will not overflow
     // hence no setcc is needed.
-    Overflow = DAG.getZExtOrTrunc(CCR, DL, N->getValueType(1));
+    Overflow = CCR;
   } else {
     // Generate a M68kISD::SETCC.
     Overflow = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1),
@@ -2406,6 +2406,17 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // Simple optimization when Cond is a constant to avoid generating
+  // M68kISD::CMOV if possible.
+  // TODO: Generalize this to use SelectionDAG::computeKnownBits.
+  if (auto *Const = dyn_cast<ConstantSDNode>(Cond.getNode())) {
+    const APInt &C = Const->getAPIntValue();
+    if (C.countr_zero() >= 5)
+      return Op2;
+    else if (C.countr_one() >= 5)
+      return Op1;
+  }
+
   // M68kISD::CMOV means set the result (which is operand 1) to the RHS if
   // condition is true.
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
diff --git a/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll b/llvm/test/CodeGen/M68k/Arith/smul-with-overflow.ll
@@ -4,19 +4,28 @@
 define zeroext i8 @smul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
 ; CHECK-LABEL: smul_i8:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    move.b (11,%sp), %d1
-; CHECK-NEXT:    and.l #255, %d1
-; CHECK-NEXT:    move.b (7,%sp), %d0
+; CHECK-NEXT:    move.b (11,%sp), %d0
 ; CHECK-NEXT:    and.l #255, %d0
-; CHECK-NEXT:    muls %d1, %d0
-; CHECK-NEXT:    move.b #0, %d1
-; CHECK-NEXT:    move.w %d1, %ccr
-; CHECK-NEXT:    bvs .LBB0_2
-; CHECK-NEXT:  ; %bb.1: ; %entry
-; CHECK-NEXT:    move.b #42, %d0
-; CHECK-NEXT:  .LBB0_2: ; %entry
+; CHECK-NEXT:    move.b (7,%sp), %d1
+; CHECK-NEXT:    and.l #255, %d1
+; CHECK-NEXT:    muls %d0, %d1
+; CHECK-NEXT:    move.l %d1, %d0
+; CHECK-NEXT:    and.l #65535, %d0
 ; CHECK-NEXT:    and.l #255, %d0
 ; CHECK-NEXT:    rts
+entry:
+  %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %smul, 1
+  %smul.result = extractvalue { i8, i1 } %smul, 0
+  %X = select i1 %cmp, i8 42, i8 %smul.result
+  ret i8 %X
+}
+
+define zeroext i8 @smul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: smul_i8_no_ovf:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    move.l #42, %d0
+; CHECK-NEXT:    rts
 entry:
   %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
   %cmp = extractvalue { i8, i1 } %smul, 1
@@ -33,19 +42,13 @@ define zeroext i16 @smul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
 ; CHECK-NEXT:    move.w (6,%sp), %d0
 ; CHECK-NEXT:    move.w (10,%sp), %d1
 ; CHECK-NEXT:    muls %d1, %d0
-; CHECK-NEXT:    move.b #0, %d1
-; CHECK-NEXT:    move.w %d1, %ccr
-; CHECK-NEXT:    bvs .LBB1_2
-; CHECK-NEXT:  ; %bb.1: ; %entry
-; CHECK-NEXT:    move.w #42, %d0
-; CHECK-NEXT:  .LBB1_2: ; %entry
 ; CHECK-NEXT:    and.l #65535, %d0
 ; CHECK-NEXT:    rts
 entry:
   %smul = tail call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
   %cmp = extractvalue { i16, i1 } %smul, 1
   %smul.result = extractvalue { i16, i1 } %smul, 0
-  %X = select i1 %cmp, i16 %smul.result, i16 42
+  %X = select i1 %cmp, i16 42, i16 %smul.result
   ret i16 %X
 }
 
@@ -62,15 +65,15 @@ define fastcc i1 @test1(i32 %v1, i32 %v2) nounwind {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    suba.l #12, %sp
 ; CHECK-NEXT:    muls.l %d1, %d0
-; CHECK-NEXT:    bvc .LBB2_1
+; CHECK-NEXT:    bvc .LBB3_1
 ; CHECK-NEXT:  ; %bb.2: ; %overflow
 ; CHECK-NEXT:    lea (no,%pc), %a0
 ; CHECK-NEXT:    move.l %a0, (%sp)
 ; CHECK-NEXT:    jsr printf@PLT
 ; CHECK-NEXT:    move.b #0, %d0
 ; CHECK-NEXT:    adda.l #12, %sp
 ; CHECK-NEXT:    rts
-; CHECK-NEXT:  .LBB2_1: ; %normal
+; CHECK-NEXT:  .LBB3_1: ; %normal
 ; CHECK-NEXT:    move.l %d0, (4,%sp)
 ; CHECK-NEXT:    lea (ok,%pc), %a0
 ; CHECK-NEXT:    move.l %a0, (%sp)
@@ -100,15 +103,15 @@ define fastcc i1 @test2(i32 %v1, i32 %v2) nounwind {
 ; CHECK-NEXT:    muls.l %d1, %d0
 ; CHECK-NEXT:    svs %d1
 ; CHECK-NEXT:    sub.b #1, %d1
-; CHECK-NEXT:    bne .LBB3_2
+; CHECK-NEXT:    bne .LBB4_2
 ; CHECK-NEXT:  ; %bb.1: ; %overflow
 ; CHECK-NEXT:    lea (no,%pc), %a0
 ; CHECK-NEXT:    move.l %a0, (%sp)
 ; CHECK-NEXT:    jsr printf@PLT
 ; CHECK-NEXT:    move.b #0, %d0
 ; CHECK-NEXT:    adda.l #12, %sp
 ; CHECK-NEXT:    rts
-; CHECK-NEXT:  .LBB3_2: ; %normal
+; CHECK-NEXT:  .LBB4_2: ; %normal
 ; CHECK-NEXT:    move.l %d0, (4,%sp)
 ; CHECK-NEXT:    lea (ok,%pc), %a0
 ; CHECK-NEXT:    move.l %a0, (%sp)
diff --git a/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll b/llvm/test/CodeGen/M68k/Arith/umul-with-overflow.ll
@@ -4,19 +4,28 @@
 define zeroext i8 @umul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
 ; CHECK-LABEL: umul_i8:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    move.b (11,%sp), %d1
-; CHECK-NEXT:    and.l #255, %d1
-; CHECK-NEXT:    move.b (7,%sp), %d0
+; CHECK-NEXT:    move.b (11,%sp), %d0
 ; CHECK-NEXT:    and.l #255, %d0
-; CHECK-NEXT:    muls %d1, %d0
-; CHECK-NEXT:    move.b #0, %d1
-; CHECK-NEXT:    move.w %d1, %ccr
-; CHECK-NEXT:    bvs .LBB0_2
-; CHECK-NEXT:  ; %bb.1: ; %entry
-; CHECK-NEXT:    move.b #42, %d0
-; CHECK-NEXT:  .LBB0_2: ; %entry
+; CHECK-NEXT:    move.b (7,%sp), %d1
+; CHECK-NEXT:    and.l #255, %d1
+; CHECK-NEXT:    muls %d0, %d1
+; CHECK-NEXT:    move.l %d1, %d0
+; CHECK-NEXT:    and.l #65535, %d0
 ; CHECK-NEXT:    and.l #255, %d0
 ; CHECK-NEXT:    rts
+entry:
+  %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %umul, 1
+  %umul.result = extractvalue { i8, i1 } %umul, 0
+  %X = select i1 %cmp, i8 42, i8 %umul.result
+  ret i8 %X
+}
+
+define zeroext i8 @umul_i8_no_ovf(i8 signext %a, i8 signext %b) nounwind ssp {
+; CHECK-LABEL: umul_i8_no_ovf:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    move.l #42, %d0
+; CHECK-NEXT:    rts
 entry:
   %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
   %cmp = extractvalue { i8, i1 } %umul, 1
@@ -33,19 +42,13 @@ define zeroext i16 @umul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
 ; CHECK-NEXT:    move.w (6,%sp), %d0
 ; CHECK-NEXT:    move.w (10,%sp), %d1
 ; CHECK-NEXT:    muls %d1, %d0
-; CHECK-NEXT:    move.b #0, %d1
-; CHECK-NEXT:    move.w %d1, %ccr
-; CHECK-NEXT:    bvs .LBB1_2
-; CHECK-NEXT:  ; %bb.1: ; %entry
-; CHECK-NEXT:    move.w #42, %d0
-; CHECK-NEXT:  .LBB1_2: ; %entry
 ; CHECK-NEXT:    and.l #65535, %d0
 ; CHECK-NEXT:    rts
 entry:
   %umul = tail call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
   %cmp = extractvalue { i16, i1 } %umul, 1
   %umul.result = extractvalue { i16, i1 } %umul, 0
-  %X = select i1 %cmp, i16 %umul.result, i16 42
+  %X = select i1 %cmp, i16 42, i16 %umul.result
   ret i16 %X
 }