Skip to content

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented Aug 19, 2025

We should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously.

@llvmbot
Copy link
Member

llvmbot commented Aug 19, 2025

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-backend-arm

Author: AZero13 (AZero13)

Changes

We should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously.


Patch is 33.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154253.diff

6 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+68-34)
  • (modified) llvm/lib/Target/ARM/ARMISelLowering.h (+1-2)
  • (modified) llvm/test/CodeGen/ARM/select.ll (+4-6)
  • (modified) llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll (+163-172)
  • (modified) llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll (+84-96)
  • (modified) llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll (+17-16)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 830156359e9e8..c15039220cc7f 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -986,6 +986,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
   setOperationAction(ISD::SSUBO, MVT::i32, Custom);
   setOperationAction(ISD::USUBO, MVT::i32, Custom);
 
+  if (!Subtarget->isThumb1Only()) {
+    setOperationAction(ISD::UMULO, MVT::i32, Custom);
+    setOperationAction(ISD::SMULO, MVT::i32, Custom);
+  }
+
   setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);
   setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);
   if (Subtarget->hasDSP()) {
@@ -4879,7 +4884,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getNode(ISD::UMUL_LOHI, dl,
                         DAG.getVTList(Op.getValueType(), Op.getValueType()),
                         LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+    OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
                               DAG.getConstant(0, dl, MVT::i32));
     Value = Value.getValue(0); // We only want the low 32 bits for the result.
     break;
@@ -4890,7 +4895,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getNode(ISD::SMUL_LOHI, dl,
                         DAG.getVTList(Op.getValueType(), Op.getValueType()),
                         LHS, RHS);
-    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+    OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
                               DAG.getNode(ISD::SRA, dl, Op.getValueType(),
                                           Value.getValue(0),
                                           DAG.getConstant(31, dl, MVT::i32)));
@@ -4901,28 +4906,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
   return std::make_pair(Value, OverflowCmp);
 }
 
-SDValue
-ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
-  // Let legalize expand this if it isn't a legal type yet.
-  if (!isTypeLegal(Op.getValueType()))
-    return SDValue();
-
-  SDValue Value, OverflowCmp;
-  SDValue ARMcc;
-  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
-  SDLoc dl(Op);
-  // We use 0 and 1 as false and true values.
-  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
-  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
-  EVT VT = Op.getValueType();
-
-  SDValue Overflow =
-      DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
-
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
-}
-
 static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
                                               SelectionDAG &DAG) {
   SDLoc DL(BoolCarry);
@@ -4947,8 +4930,7 @@ static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
                      DAG.getConstant(0, DL, MVT::i32), Flags);
 }
 
-SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
-                                             SelectionDAG &DAG) const {
+SDValue ARMTargetLowering::LowerALUO(SDValue Op, SelectionDAG &DAG) const {
   // Let legalize expand this if it isn't a legal type yet.
   if (!isTypeLegal(Op.getValueType()))
     return SDValue();
@@ -4962,14 +4944,12 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
   SDValue Value;
   SDValue Overflow;
   switch (Op.getOpcode()) {
-  default:
-    llvm_unreachable("Unknown overflow instruction!");
   case ISD::UADDO:
     Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
     // Convert the carry flag into a boolean value.
     Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
     break;
-  case ISD::USUBO: {
+  case ISD::USUBO:
     Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
     // Convert the carry flag into a boolean value.
     Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
@@ -4978,6 +4958,57 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
     Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
                            DAG.getConstant(1, dl, MVT::i32), Overflow);
     break;
+  case ISD::UMULO: {
+    // We generate a UMUL_LOHI and then check if the high word is 0.
+    Value = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+    Value = Value.getValue(0); // We only want the low 32 bits for the result.
+    Overflow = DAG.getNode(ARMISD::SUBC, dl, VTs, Value,
+                           DAG.getConstant(0, dl, MVT::i32));
+    Value = Overflow.getValue(0);
+    SDValue Flags = Overflow.getValue(1);
+
+    ARMCC::CondCodes NECond = ARMCC::NE;
+    SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+    Overflow =
+        DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Overflow,
+                    DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+    break;
+  }
+  case ISD::SMULO: {
+    // We generate a SMUL_LOHI and then check if all the bits of the high word
+    // are the same as the sign bit of the low word.
+    Value = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+    SDValue HighWord = Value.getValue(1);
+    SDValue LowWord = Value.getValue(0);
+    Value = LowWord; // We only want the low 32 bits for the result.
+
+    // Compute sign bit of low word: LowWord >> 31
+    SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, LowWord,
+                                  DAG.getConstant(31, dl, MVT::i32));
+
+    // Subtract sign bit from high word: HighWord - SignBit
+    Overflow = DAG.getNode(ARMISD::SUBC, dl, VTs, HighWord, SignBit);
+    SDValue Flags = Overflow.getValue(1);
+
+    ARMCC::CondCodes NECond = ARMCC::NE;
+    SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+    Overflow =
+        DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Overflow,
+                    DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+    break;
+  }
+  default: {
+    // Handle other operations with getARMXALUOOp
+    SDValue Value, OverflowCmp;
+    SDValue ARMcc;
+    std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+    // We use 0 and 1 as false and true values.
+    SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+    SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+
+    Overflow =
+        DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
+    break;
   }
   }
 
@@ -5045,9 +5076,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   unsigned Opc = Cond.getOpcode();
 
+  bool OptimizeMul =
+      (Opc == ISD::SMULO || Opc == ISD::UMULO) && !Subtarget->isThumb1Only();
   if (Cond.getResNo() == 1 &&
       (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO)) {
+       Opc == ISD::USUBO || OptimizeMul)) {
     if (!isTypeLegal(Cond->getValueType(0)))
       return SDValue();
 
@@ -10653,12 +10686,13 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::UADDO_CARRY:
   case ISD::USUBO_CARRY:
     return LowerUADDSUBO_CARRY(Op, DAG);
-  case ISD::SADDO:
-  case ISD::SSUBO:
-    return LowerSignedALUO(Op, DAG);
   case ISD::UADDO:
   case ISD::USUBO:
-    return LowerUnsignedALUO(Op, DAG);
+  case ISD::UMULO:
+  case ISD::SADDO:
+  case ISD::SSUBO:
+  case ISD::SMULO:
+    return LowerALUO(Op, DAG);
   case ISD::SADDSAT:
   case ISD::SSUBSAT:
   case ISD::UADDSAT:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 778595e93f84a..3e710f6d60a12 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -866,8 +866,7 @@ class VectorType;
     SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerALUO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll
index 48d6ee925d939..3f186f69c5aa3 100644
--- a/llvm/test/CodeGen/ARM/select.ll
+++ b/llvm/test/CodeGen/ARM/select.ll
@@ -440,17 +440,15 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 define i1 @test_overflow_recombine(i32 %in1, i32 %in2) {
 ; CHECK-LABEL: test_overflow_recombine:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    mul r2, r0, r1
-; CHECK-NEXT:    smmul r0, r0, r1
-; CHECK-NEXT:    subs r0, r0, r2, asr #31
+; CHECK-NEXT:    smull r0, r1, r0, r1
+; CHECK-NEXT:    subs r0, r1, r0, asr #31
 ; CHECK-NEXT:    movwne r0, #1
 ; CHECK-NEXT:    bx lr
 ;
 ; CHECK-NEON-LABEL: test_overflow_recombine:
 ; CHECK-NEON:       @ %bb.0:
-; CHECK-NEON-NEXT:    mul r2, r0, r1
-; CHECK-NEON-NEXT:    smmul r0, r0, r1
-; CHECK-NEON-NEXT:    subs.w r0, r0, r2, asr #31
+; CHECK-NEON-NEXT:    smull r0, r1, r0, r1
+; CHECK-NEON-NEXT:    subs.w r0, r1, r0, asr #31
 ; CHECK-NEON-NEXT:    it ne
 ; CHECK-NEON-NEXT:    movne r0, #1
 ; CHECK-NEON-NEXT:    bx lr
diff --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
index 4eb82c80e2bff..1bacd3258f698 100644
--- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
@@ -6,210 +6,201 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; ARMV6-LABEL: muloti_test:
 ; ARMV6:       @ %bb.0: @ %start
 ; ARMV6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV6-NEXT:    sub sp, sp, #28
-; ARMV6-NEXT:    ldr r4, [sp, #72]
-; ARMV6-NEXT:    mov r7, r0
-; ARMV6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; ARMV6-NEXT:    ldr r12, [sp, #64]
-; ARMV6-NEXT:    umull r1, r0, r2, r4
+; ARMV6-NEXT:    sub sp, sp, #20
+; ARMV6-NEXT:    ldr r4, [sp, #64]
+; ARMV6-NEXT:    mov lr, r0
 ; ARMV6-NEXT:    ldr r5, [sp, #68]
-; ARMV6-NEXT:    str r1, [r7]
-; ARMV6-NEXT:    ldr r1, [sp, #76]
-; ARMV6-NEXT:    umull r7, r6, r1, r12
-; ARMV6-NEXT:    str r6, [sp, #8] @ 4-byte Spill
-; ARMV6-NEXT:    umull r6, r9, r5, r4
-; ARMV6-NEXT:    add r7, r6, r7
-; ARMV6-NEXT:    umull r4, r6, r12, r4
-; ARMV6-NEXT:    str r4, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT:    mov r4, #0
-; ARMV6-NEXT:    adds r8, r6, r7
-; ARMV6-NEXT:    ldr r6, [sp, #80]
-; ARMV6-NEXT:    adc r7, r4, #0
-; ARMV6-NEXT:    ldr r4, [sp, #84]
-; ARMV6-NEXT:    str r7, [sp, #24] @ 4-byte Spill
-; ARMV6-NEXT:    umull r12, lr, r3, r6
-; ARMV6-NEXT:    umull r11, r7, r4, r2
-; ARMV6-NEXT:    add r12, r11, r12
-; ARMV6-NEXT:    umull r11, r10, r6, r2
-; ARMV6-NEXT:    adds r12, r10, r12
-; ARMV6-NEXT:    mov r10, #0
-; ARMV6-NEXT:    adc r6, r10, #0
-; ARMV6-NEXT:    str r6, [sp, #20] @ 4-byte Spill
-; ARMV6-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT:    adds r6, r6, r11
-; ARMV6-NEXT:    str r6, [sp, #12] @ 4-byte Spill
-; ARMV6-NEXT:    adc r6, r8, r12
-; ARMV6-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT:    ldr r6, [sp, #72]
-; ARMV6-NEXT:    mov r12, #0
-; ARMV6-NEXT:    umull r2, r8, r2, r1
-; ARMV6-NEXT:    umlal r0, r12, r3, r6
-; ARMV6-NEXT:    adds r0, r2, r0
-; ARMV6-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; ARMV6-NEXT:    adcs r8, r12, r8
-; ARMV6-NEXT:    adc r12, r10, #0
-; ARMV6-NEXT:    cmp lr, #0
-; ARMV6-NEXT:    str r0, [r2, #4]
-; ARMV6-NEXT:    movne lr, #1
-; ARMV6-NEXT:    ldr r11, [sp, #8] @ 4-byte Reload
-; ARMV6-NEXT:    cmp r7, #0
-; ARMV6-NEXT:    movne r7, #1
-; ARMV6-NEXT:    ldr r0, [sp, #64]
-; ARMV6-NEXT:    cmp r11, #0
-; ARMV6-NEXT:    umlal r8, r12, r3, r1
+; ARMV6-NEXT:    umull r1, r12, r2, r4
+; ARMV6-NEXT:    str r1, [r0]
+; ARMV6-NEXT:    ldr r1, [sp, #60]
+; ARMV6-NEXT:    mul r7, r1, r4
+; ARMV6-NEXT:    subs r6, r7, #0
+; ARMV6-NEXT:    mov r0, r6
+; ARMV6-NEXT:    movne r0, #1
+; ARMV6-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT:    ldr r0, [sp, #56]
+; ARMV6-NEXT:    mul r7, r5, r0
+; ARMV6-NEXT:    subs r11, r7, #0
+; ARMV6-NEXT:    umull r0, r7, r0, r4
+; ARMV6-NEXT:    add r6, r6, r11
 ; ARMV6-NEXT:    movne r11, #1
-; ARMV6-NEXT:    cmp r9, #0
-; ARMV6-NEXT:    movne r9, #1
-; ARMV6-NEXT:    orrs r10, r0, r5
-; ARMV6-NEXT:    ldr r0, [sp, #80]
+; ARMV6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT:    mov r0, #0
+; ARMV6-NEXT:    adds r4, r7, r6
+; ARMV6-NEXT:    ldr r6, [sp, #76]
+; ARMV6-NEXT:    adc r0, r0, #0
+; ARMV6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; ARMV6-NEXT:    ldr r0, [sp, #72]
+; ARMV6-NEXT:    mul r7, r6, r2
+; ARMV6-NEXT:    str r4, [sp] @ 4-byte Spill
+; ARMV6-NEXT:    mov r4, #0
+; ARMV6-NEXT:    subs r9, r7, #0
+; ARMV6-NEXT:    mul r7, r3, r0
+; ARMV6-NEXT:    umull r0, r8, r0, r2
+; ARMV6-NEXT:    mov r10, r9
 ; ARMV6-NEXT:    movne r10, #1
-; ARMV6-NEXT:    ldr r6, [sp, #12] @ 4-byte Reload
-; ARMV6-NEXT:    orrs r0, r0, r4
-; ARMV6-NEXT:    movne r0, #1
-; ARMV6-NEXT:    cmp r4, #0
+; ARMV6-NEXT:    subs r7, r7, #0
+; ARMV6-NEXT:    add r9, r9, r7
+; ARMV6-NEXT:    movne r7, #1
+; ARMV6-NEXT:    adds r9, r8, r9
+; ARMV6-NEXT:    adc r8, r4, #0
+; ARMV6-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT:    str r8, [sp, #8] @ 4-byte Spill
+; ARMV6-NEXT:    adds r0, r4, r0
+; ARMV6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; ARMV6-NEXT:    ldr r4, [sp, #64]
+; ARMV6-NEXT:    adc r8, r0, r9
+; ARMV6-NEXT:    mov r0, #0
+; ARMV6-NEXT:    umull r2, r9, r2, r5
+; ARMV6-NEXT:    umlal r12, r0, r3, r4
+; ARMV6-NEXT:    adds r2, r2, r12
+; ARMV6-NEXT:    str r2, [lr, #4]
+; ARMV6-NEXT:    adcs r0, r0, r9
+; ARMV6-NEXT:    mov r2, #0
+; ARMV6-NEXT:    adc r9, r2, #0
+; ARMV6-NEXT:    ldr r2, [sp, #56]
+; ARMV6-NEXT:    umlal r0, r9, r3, r5
+; ARMV6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT:    orrs r4, r2, r1
+; ARMV6-NEXT:    ldr r2, [sp, #72]
 ; ARMV6-NEXT:    movne r4, #1
+; ARMV6-NEXT:    orrs r2, r2, r6
+; ARMV6-NEXT:    movne r2, #1
+; ARMV6-NEXT:    cmp r6, #0
+; ARMV6-NEXT:    movne r6, #1
 ; ARMV6-NEXT:    cmp r3, #0
 ; ARMV6-NEXT:    movne r3, #1
-; ARMV6-NEXT:    cmp r5, #0
-; ARMV6-NEXT:    movne r5, #1
 ; ARMV6-NEXT:    cmp r1, #0
 ; ARMV6-NEXT:    movne r1, #1
-; ARMV6-NEXT:    adds r6, r8, r6
-; ARMV6-NEXT:    str r6, [r2, #8]
-; ARMV6-NEXT:    and r1, r5, r1
-; ARMV6-NEXT:    ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT:    orr r1, r1, r9
+; ARMV6-NEXT:    cmp r5, #0
+; ARMV6-NEXT:    movne r5, #1
+; ARMV6-NEXT:    adds r0, r0, r12
+; ARMV6-NEXT:    str r0, [lr, #8]
+; ARMV6-NEXT:    adcs r0, r9, r8
+; ARMV6-NEXT:    str r0, [lr, #12]
+; ARMV6-NEXT:    and r0, r4, r2
+; ARMV6-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; ARMV6-NEXT:    and r1, r1, r5
+; ARMV6-NEXT:    orr r1, r1, r2
+; ARMV6-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; ARMV6-NEXT:    orr r1, r1, r11
-; ARMV6-NEXT:    and r0, r10, r0
-; ARMV6-NEXT:    adcs r6, r12, r6
-; ARMV6-NEXT:    str r6, [r2, #12]
-; ARMV6-NEXT:    ldr r6, [sp, #24] @ 4-byte Reload
-; ARMV6-NEXT:    orr r1, r1, r6
+; ARMV6-NEXT:    orr r1, r1, r2
+; ARMV6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
 ; ARMV6-NEXT:    orr r0, r0, r1
-; ARMV6-NEXT:    and r1, r4, r3
+; ARMV6-NEXT:    and r1, r6, r3
+; ARMV6-NEXT:    orr r1, r1, r10
 ; ARMV6-NEXT:    orr r1, r1, r7
-; ARMV6-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; ARMV6-NEXT:    orr r1, r1, lr
-; ARMV6-NEXT:    orr r1, r1, r3
+; ARMV6-NEXT:    orr r1, r1, r2
 ; ARMV6-NEXT:    orr r0, r0, r1
 ; ARMV6-NEXT:    mov r1, #0
 ; ARMV6-NEXT:    adc r1, r1, #0
 ; ARMV6-NEXT:    orr r0, r0, r1
 ; ARMV6-NEXT:    and r0, r0, #1
-; ARMV6-NEXT:    strb r0, [r2, #16]
-; ARMV6-NEXT:    add sp, sp, #28
+; ARMV6-NEXT:    strb r0, [lr, #16]
+; ARMV6-NEXT:    add sp, sp, #20
 ; ARMV6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; ARMV7-LABEL: muloti_test:
 ; ARMV7:       @ %bb.0: @ %start
 ; ARMV7-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV7-NEXT:    sub sp, sp, #44
+; ARMV7-NEXT:    sub sp, sp, #36
+; ARMV7-NEXT:    ldr r5, [sp, #84]
+; ARMV7-NEXT:    mov r12, r0
+; ARMV7-NEXT:    ldr r1, [sp, #80]
+; ARMV7-NEXT:    ldr r10, [sp, #72]
+; ARMV7-NEXT:    umull r7, r6, r2, r5
+; ARMV7-NEXT:    ldr r4, [sp, #76]
 ; ARMV7-NEXT:    ldr r8, [sp, #88]
-; ARMV7-NEXT:    mov r9, r0
-; ARMV7-NEXT:    ldr r7, [sp, #96]
-; ARMV7-NEXT:    ldr lr, [sp, #100]
-; ARMV7-NEXT:    umull r0, r5, r2, r8
-; ARMV7-NEXT:    ldr r4, [sp, #80]
-; ARMV7-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT:    umull r1, r0, r3, r7
-; ARMV7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; ARMV7-NEXT:    umull r0, r11, lr, r2
-; ARMV7-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT:    ldr r1, [sp, #92]
-; ARMV7-NEXT:    str r0, [sp] @ 4-byte Spill
-; ARMV7-NEXT:    umull r0, r10, r7, r2
-; ARMV7-NEXT:    mov r7, r1
-; ARMV7-NEXT:    umull r6, r12, r1, r4
-; ARMV7-NEXT:    str r0, [sp, #40] @ 4-byte Spill
-; ARMV7-NEXT:    ldr r0, [sp, #84]
-; ARMV7-NEXT:    str r6, [sp, #24] @ 4-byte Spill
-; ARMV7-NEXT:    umull r6, r1, r0, r8
+; ARMV7-NEXT:    umull lr, r0, r2, r1
 ; ARMV7-NEXT:    str r6, [sp, #16] @ 4-byte Spill
-; ARMV7-NEXT:    umull r6, r2, r2, r7
-; ARMV7-NEXT:    mov r7, r4
-; ARMV7-NEXT:    str r6, [sp, #8] @ 4-byte Spill
-; ARMV7-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; ARMV7-NEXT:    umull r2, r6, r4, r8
-; ARMV7-NEXT:    str r2, [sp, #36] @ 4-byte Spill
-; ARMV7-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
-; ARMV7-NEXT:    str r6, [sp, #28] @ 4-byte Spill
 ; ARMV7-NEXT:    mov r6, #0
-; ARMV7-NEXT:    str r2, [r9]
-; ARMV7-NEXT:    umlal r5, r6, r3, r8
-; ARMV7-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT:    ldr r4, [sp] @ 4-byte Reload
-; ARMV7-NEXT:    add r4, r4, r2
-; ARMV7-NEXT:    adds r2, r10, r4
-; ARMV7-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT:    mov r2, #0
-; ARMV7-NEXT:    adc r2, r2, #0
-; ARMV7-NEXT:    cmp r12, #0
-; ARMV7-NEXT:    str r2, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT:    movwne r12, #1
-; ARMV7-NEXT:    cmp r1, #0
-; ARMV7-NEXT:    ldr r2, [sp, #96]
-; ARMV7-NEXT:    movwne r1, #1
-; ARMV7-NEXT:    orrs r10, r7, r0
-; ARMV7-NEXT:    movwne r10, #1
-; ARMV7-NEXT:    orrs r7, r2, lr
-; ARMV7-NEXT:    ldr r2, [sp, #92]
+; ARMV7-NEXT:    mul r9, r4, r1
+; ARMV7-NEXT:    str r7, [sp, #4] @ 4-byte Spill
+; ARMV7-NEXT:    umlal r0, r6, r3, r1
+; ARMV7-NEXT:    umull r1, r7, r10, r1
+; ARMV7-NEXT:    mul r11, r5, r10
+; ARMV7-NEXT:    str r1, [sp, #32] @ 4-byte Spill
+; ARMV7-NEXT:    mul r1, r3, r8
+; ARMV7-NEXT:    str r7, [sp, #12] @ 4-byte Spill
+; ARMV7-NEXT:    umull r7, r10, r8, r2
+; ARMV7-NEXT:    subs r5, r11, #0
+; ARMV7-NEXT:    str r1, [sp, #20] @ 4-byte Spill
+; ARMV7-NEXT:    ldr r1, [sp, #92]
+; ARMV7-NEXT:    str r10, [sp, #24] @ 4-byte Spill
+; ARMV7-NEXT:    str r7, [sp, #28] @ 4-byte Spill
+; ARMV7-NEXT:    mul r2, r1, r2
+; ARMV7-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT:    str lr, [r12]
+; ARMV7-NEXT:    str r5, [sp] @ 4-byte Spill
+; ARMV7-NEXT:    movwne r5, #1
+; ARMV7-NEXT:    subs lr, r9, #0
+; ARMV7-NEXT:    ldr r2, [sp, #72]
+; ARMV7-NEXT:    ldr r9, [sp, #84]
+; ARMV7-NEXT:    mov r7, lr
+; ARMV7-NEXT:    ldr r11, [sp, #4] @ 4-byte Reload
 ; ARMV7-NEXT:    movwne r7, #1
-; ARMV7-NEXT:    cmp r0, #0
-; ARMV7-NEXT:    movwne r0, #1
-; ARMV7-NEXT:    cmp r2, #0
-; ARMV7-NEXT:    mov r4, r2
-; ARMV7-NEXT:    mov r8, r2
-; ARMV7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; ARMV7-NEXT:    movwne r4, #1
-; ARMV7-NEXT:    and r0, r0, r4
-; ARMV7-NEXT:    mov r4, #0
-; ARMV7-NEXT:    adds r5, r2, r5
-; ARMV7-NEXT:    str r5, [r9, #4]
-; ARMV7-NEXT:    orr r0, r0, r1
-; ARMV7-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; ARMV7-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; ARMV7-NEXT:    and r5, r10, r7
-; ARMV7-NEXT:    orr r0, r0, r12
-; ARMV7-NEXT:    mov r12, #0
-; ARMV7-NEXT:    add r1, r2, r1
-; ARMV7-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; ARMV7-NEXT:    adcs r2, r6, r2
-; ARMV7-NEXT:    ldr r6, [sp, #28] @ 4-byte Reload
-; ARMV7-NEXT:    adc r7, r4, #0
-; ARMV7-NEXT:    adds r1, r6, r1
...
[truncated]

@AZero13 AZero13 force-pushed the mulo branch 2 times, most recently from a1116f2 to 87edae9 Compare August 19, 2025 02:54
Copy link

github-actions bot commented Aug 19, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@AZero13 AZero13 force-pushed the mulo branch 2 times, most recently from 79f7844 to 02a54d1 Compare August 19, 2025 03:05
@llvmbot llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Aug 19, 2025
We should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously.

Properly lower UMULO and SMULO if not thumb1.
@AZero13 AZero13 marked this pull request as draft August 19, 2025 21:03
@AZero13 AZero13 marked this pull request as ready for review August 19, 2025 21:08
@AZero13
Copy link
Contributor Author

AZero13 commented Sep 17, 2025

@davemgreen ping

@AZero13
Copy link
Contributor Author

AZero13 commented Oct 15, 2025

@arsenm

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:ARM llvm:analysis Includes value tracking, cost tables and constant folding

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants