-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[ARM] Set operation action for UMULO and SMULO as Custom if not Thumb1 #154253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-arm Author: AZero13 (AZero13) ChangesWe should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously. Patch is 33.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154253.diff 6 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 830156359e9e8..c15039220cc7f 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -986,6 +986,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
+ if (!Subtarget->isThumb1Only()) {
+ setOperationAction(ISD::UMULO, MVT::i32, Custom);
+ setOperationAction(ISD::SMULO, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);
setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);
if (Subtarget->hasDSP()) {
@@ -4879,7 +4884,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::UMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
DAG.getConstant(0, dl, MVT::i32));
Value = Value.getValue(0); // We only want the low 32 bits for the result.
break;
@@ -4890,7 +4895,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::SMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMPZ, dl, FlagsVT, Value.getValue(1),
DAG.getNode(ISD::SRA, dl, Op.getValueType(),
Value.getValue(0),
DAG.getConstant(31, dl, MVT::i32)));
@@ -4901,28 +4906,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
return std::make_pair(Value, OverflowCmp);
}
-SDValue
-ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
- // Let legalize expand this if it isn't a legal type yet.
- if (!isTypeLegal(Op.getValueType()))
- return SDValue();
-
- SDValue Value, OverflowCmp;
- SDValue ARMcc;
- std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
- SDLoc dl(Op);
- // We use 0 and 1 as false and true values.
- SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
- SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
- EVT VT = Op.getValueType();
-
- SDValue Overflow =
- DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
-}
-
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
SelectionDAG &DAG) {
SDLoc DL(BoolCarry);
@@ -4947,8 +4930,7 @@ static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
DAG.getConstant(0, DL, MVT::i32), Flags);
}
-SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue ARMTargetLowering::LowerALUO(SDValue Op, SelectionDAG &DAG) const {
// Let legalize expand this if it isn't a legal type yet.
if (!isTypeLegal(Op.getValueType()))
return SDValue();
@@ -4962,14 +4944,12 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
SDValue Value;
SDValue Overflow;
switch (Op.getOpcode()) {
- default:
- llvm_unreachable("Unknown overflow instruction!");
case ISD::UADDO:
Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
// Convert the carry flag into a boolean value.
Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
break;
- case ISD::USUBO: {
+ case ISD::USUBO:
Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
// Convert the carry flag into a boolean value.
Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
@@ -4978,6 +4958,57 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(1, dl, MVT::i32), Overflow);
break;
+ case ISD::UMULO: {
+ // We generate a UMUL_LOHI and then check if the high word is 0.
+ Value = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ Value = Value.getValue(0); // We only want the low 32 bits for the result.
+ Overflow = DAG.getNode(ARMISD::SUBC, dl, VTs, Value,
+ DAG.getConstant(0, dl, MVT::i32));
+ Value = Overflow.getValue(0);
+ SDValue Flags = Overflow.getValue(1);
+
+ ARMCC::CondCodes NECond = ARMCC::NE;
+ SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Overflow,
+ DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+ break;
+ }
+ case ISD::SMULO: {
+ // We generate a SMUL_LOHI and then check if all the bits of the high word
+ // are the same as the sign bit of the low word.
+ Value = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ SDValue HighWord = Value.getValue(1);
+ SDValue LowWord = Value.getValue(0);
+ Value = LowWord; // We only want the low 32 bits for the result.
+
+ // Compute sign bit of low word: LowWord >> 31
+ SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, LowWord,
+ DAG.getConstant(31, dl, MVT::i32));
+
+ // Subtract sign bit from high word: HighWord - SignBit
+ Overflow = DAG.getNode(ARMISD::SUBC, dl, VTs, HighWord, SignBit);
+ SDValue Flags = Overflow.getValue(1);
+
+ ARMCC::CondCodes NECond = ARMCC::NE;
+ SDValue NECondValue = DAG.getConstant(NECond, dl, MVT::i32);
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Overflow,
+ DAG.getConstant(1, dl, MVT::i32), NECondValue, Flags);
+ break;
+ }
+ default: {
+ // Handle other operations with getARMXALUOOp
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+ // We use 0 and 1 as false and true values.
+ SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+
+ Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
+ break;
}
}
@@ -5045,9 +5076,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
unsigned Opc = Cond.getOpcode();
+ bool OptimizeMul =
+ (Opc == ISD::SMULO || Opc == ISD::UMULO) && !Subtarget->isThumb1Only();
if (Cond.getResNo() == 1 &&
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO)) {
+ Opc == ISD::USUBO || OptimizeMul)) {
if (!isTypeLegal(Cond->getValueType(0)))
return SDValue();
@@ -10653,12 +10686,13 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDO_CARRY:
case ISD::USUBO_CARRY:
return LowerUADDSUBO_CARRY(Op, DAG);
- case ISD::SADDO:
- case ISD::SSUBO:
- return LowerSignedALUO(Op, DAG);
case ISD::UADDO:
case ISD::USUBO:
- return LowerUnsignedALUO(Op, DAG);
+ case ISD::UMULO:
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ case ISD::SMULO:
+ return LowerALUO(Op, DAG);
case ISD::SADDSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 778595e93f84a..3e710f6d60a12 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -866,8 +866,7 @@ class VectorType;
SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll
index 48d6ee925d939..3f186f69c5aa3 100644
--- a/llvm/test/CodeGen/ARM/select.ll
+++ b/llvm/test/CodeGen/ARM/select.ll
@@ -440,17 +440,15 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
define i1 @test_overflow_recombine(i32 %in1, i32 %in2) {
; CHECK-LABEL: test_overflow_recombine:
; CHECK: @ %bb.0:
-; CHECK-NEXT: mul r2, r0, r1
-; CHECK-NEXT: smmul r0, r0, r1
-; CHECK-NEXT: subs r0, r0, r2, asr #31
+; CHECK-NEXT: smull r0, r1, r0, r1
+; CHECK-NEXT: subs r0, r1, r0, asr #31
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
;
; CHECK-NEON-LABEL: test_overflow_recombine:
; CHECK-NEON: @ %bb.0:
-; CHECK-NEON-NEXT: mul r2, r0, r1
-; CHECK-NEON-NEXT: smmul r0, r0, r1
-; CHECK-NEON-NEXT: subs.w r0, r0, r2, asr #31
+; CHECK-NEON-NEXT: smull r0, r1, r0, r1
+; CHECK-NEON-NEXT: subs.w r0, r1, r0, asr #31
; CHECK-NEON-NEXT: it ne
; CHECK-NEON-NEXT: movne r0, #1
; CHECK-NEON-NEXT: bx lr
diff --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
index 4eb82c80e2bff..1bacd3258f698 100644
--- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
@@ -6,210 +6,201 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; ARMV6-LABEL: muloti_test:
; ARMV6: @ %bb.0: @ %start
; ARMV6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV6-NEXT: sub sp, sp, #28
-; ARMV6-NEXT: ldr r4, [sp, #72]
-; ARMV6-NEXT: mov r7, r0
-; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMV6-NEXT: ldr r12, [sp, #64]
-; ARMV6-NEXT: umull r1, r0, r2, r4
+; ARMV6-NEXT: sub sp, sp, #20
+; ARMV6-NEXT: ldr r4, [sp, #64]
+; ARMV6-NEXT: mov lr, r0
; ARMV6-NEXT: ldr r5, [sp, #68]
-; ARMV6-NEXT: str r1, [r7]
-; ARMV6-NEXT: ldr r1, [sp, #76]
-; ARMV6-NEXT: umull r7, r6, r1, r12
-; ARMV6-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; ARMV6-NEXT: umull r6, r9, r5, r4
-; ARMV6-NEXT: add r7, r6, r7
-; ARMV6-NEXT: umull r4, r6, r12, r4
-; ARMV6-NEXT: str r4, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT: mov r4, #0
-; ARMV6-NEXT: adds r8, r6, r7
-; ARMV6-NEXT: ldr r6, [sp, #80]
-; ARMV6-NEXT: adc r7, r4, #0
-; ARMV6-NEXT: ldr r4, [sp, #84]
-; ARMV6-NEXT: str r7, [sp, #24] @ 4-byte Spill
-; ARMV6-NEXT: umull r12, lr, r3, r6
-; ARMV6-NEXT: umull r11, r7, r4, r2
-; ARMV6-NEXT: add r12, r11, r12
-; ARMV6-NEXT: umull r11, r10, r6, r2
-; ARMV6-NEXT: adds r12, r10, r12
-; ARMV6-NEXT: mov r10, #0
-; ARMV6-NEXT: adc r6, r10, #0
-; ARMV6-NEXT: str r6, [sp, #20] @ 4-byte Spill
-; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT: adds r6, r6, r11
-; ARMV6-NEXT: str r6, [sp, #12] @ 4-byte Spill
-; ARMV6-NEXT: adc r6, r8, r12
-; ARMV6-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT: ldr r6, [sp, #72]
-; ARMV6-NEXT: mov r12, #0
-; ARMV6-NEXT: umull r2, r8, r2, r1
-; ARMV6-NEXT: umlal r0, r12, r3, r6
-; ARMV6-NEXT: adds r0, r2, r0
-; ARMV6-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; ARMV6-NEXT: adcs r8, r12, r8
-; ARMV6-NEXT: adc r12, r10, #0
-; ARMV6-NEXT: cmp lr, #0
-; ARMV6-NEXT: str r0, [r2, #4]
-; ARMV6-NEXT: movne lr, #1
-; ARMV6-NEXT: ldr r11, [sp, #8] @ 4-byte Reload
-; ARMV6-NEXT: cmp r7, #0
-; ARMV6-NEXT: movne r7, #1
-; ARMV6-NEXT: ldr r0, [sp, #64]
-; ARMV6-NEXT: cmp r11, #0
-; ARMV6-NEXT: umlal r8, r12, r3, r1
+; ARMV6-NEXT: umull r1, r12, r2, r4
+; ARMV6-NEXT: str r1, [r0]
+; ARMV6-NEXT: ldr r1, [sp, #60]
+; ARMV6-NEXT: mul r7, r1, r4
+; ARMV6-NEXT: subs r6, r7, #0
+; ARMV6-NEXT: mov r0, r6
+; ARMV6-NEXT: movne r0, #1
+; ARMV6-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT: ldr r0, [sp, #56]
+; ARMV6-NEXT: mul r7, r5, r0
+; ARMV6-NEXT: subs r11, r7, #0
+; ARMV6-NEXT: umull r0, r7, r0, r4
+; ARMV6-NEXT: add r6, r6, r11
; ARMV6-NEXT: movne r11, #1
-; ARMV6-NEXT: cmp r9, #0
-; ARMV6-NEXT: movne r9, #1
-; ARMV6-NEXT: orrs r10, r0, r5
-; ARMV6-NEXT: ldr r0, [sp, #80]
+; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT: mov r0, #0
+; ARMV6-NEXT: adds r4, r7, r6
+; ARMV6-NEXT: ldr r6, [sp, #76]
+; ARMV6-NEXT: adc r0, r0, #0
+; ARMV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; ARMV6-NEXT: ldr r0, [sp, #72]
+; ARMV6-NEXT: mul r7, r6, r2
+; ARMV6-NEXT: str r4, [sp] @ 4-byte Spill
+; ARMV6-NEXT: mov r4, #0
+; ARMV6-NEXT: subs r9, r7, #0
+; ARMV6-NEXT: mul r7, r3, r0
+; ARMV6-NEXT: umull r0, r8, r0, r2
+; ARMV6-NEXT: mov r10, r9
; ARMV6-NEXT: movne r10, #1
-; ARMV6-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; ARMV6-NEXT: orrs r0, r0, r4
-; ARMV6-NEXT: movne r0, #1
-; ARMV6-NEXT: cmp r4, #0
+; ARMV6-NEXT: subs r7, r7, #0
+; ARMV6-NEXT: add r9, r9, r7
+; ARMV6-NEXT: movne r7, #1
+; ARMV6-NEXT: adds r9, r8, r9
+; ARMV6-NEXT: adc r8, r4, #0
+; ARMV6-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT: str r8, [sp, #8] @ 4-byte Spill
+; ARMV6-NEXT: adds r0, r4, r0
+; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT: ldr r0, [sp] @ 4-byte Reload
+; ARMV6-NEXT: ldr r4, [sp, #64]
+; ARMV6-NEXT: adc r8, r0, r9
+; ARMV6-NEXT: mov r0, #0
+; ARMV6-NEXT: umull r2, r9, r2, r5
+; ARMV6-NEXT: umlal r12, r0, r3, r4
+; ARMV6-NEXT: adds r2, r2, r12
+; ARMV6-NEXT: str r2, [lr, #4]
+; ARMV6-NEXT: adcs r0, r0, r9
+; ARMV6-NEXT: mov r2, #0
+; ARMV6-NEXT: adc r9, r2, #0
+; ARMV6-NEXT: ldr r2, [sp, #56]
+; ARMV6-NEXT: umlal r0, r9, r3, r5
+; ARMV6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT: orrs r4, r2, r1
+; ARMV6-NEXT: ldr r2, [sp, #72]
; ARMV6-NEXT: movne r4, #1
+; ARMV6-NEXT: orrs r2, r2, r6
+; ARMV6-NEXT: movne r2, #1
+; ARMV6-NEXT: cmp r6, #0
+; ARMV6-NEXT: movne r6, #1
; ARMV6-NEXT: cmp r3, #0
; ARMV6-NEXT: movne r3, #1
-; ARMV6-NEXT: cmp r5, #0
-; ARMV6-NEXT: movne r5, #1
; ARMV6-NEXT: cmp r1, #0
; ARMV6-NEXT: movne r1, #1
-; ARMV6-NEXT: adds r6, r8, r6
-; ARMV6-NEXT: str r6, [r2, #8]
-; ARMV6-NEXT: and r1, r5, r1
-; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, r9
+; ARMV6-NEXT: cmp r5, #0
+; ARMV6-NEXT: movne r5, #1
+; ARMV6-NEXT: adds r0, r0, r12
+; ARMV6-NEXT: str r0, [lr, #8]
+; ARMV6-NEXT: adcs r0, r9, r8
+; ARMV6-NEXT: str r0, [lr, #12]
+; ARMV6-NEXT: and r0, r4, r2
+; ARMV6-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; ARMV6-NEXT: and r1, r1, r5
+; ARMV6-NEXT: orr r1, r1, r2
+; ARMV6-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; ARMV6-NEXT: orr r1, r1, r11
-; ARMV6-NEXT: and r0, r10, r0
-; ARMV6-NEXT: adcs r6, r12, r6
-; ARMV6-NEXT: str r6, [r2, #12]
-; ARMV6-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, r6
+; ARMV6-NEXT: orr r1, r1, r2
+; ARMV6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; ARMV6-NEXT: orr r0, r0, r1
-; ARMV6-NEXT: and r1, r4, r3
+; ARMV6-NEXT: and r1, r6, r3
+; ARMV6-NEXT: orr r1, r1, r10
; ARMV6-NEXT: orr r1, r1, r7
-; ARMV6-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, lr
-; ARMV6-NEXT: orr r1, r1, r3
+; ARMV6-NEXT: orr r1, r1, r2
; ARMV6-NEXT: orr r0, r0, r1
; ARMV6-NEXT: mov r1, #0
; ARMV6-NEXT: adc r1, r1, #0
; ARMV6-NEXT: orr r0, r0, r1
; ARMV6-NEXT: and r0, r0, #1
-; ARMV6-NEXT: strb r0, [r2, #16]
-; ARMV6-NEXT: add sp, sp, #28
+; ARMV6-NEXT: strb r0, [lr, #16]
+; ARMV6-NEXT: add sp, sp, #20
; ARMV6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; ARMV7-LABEL: muloti_test:
; ARMV7: @ %bb.0: @ %start
; ARMV7-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV7-NEXT: sub sp, sp, #44
+; ARMV7-NEXT: sub sp, sp, #36
+; ARMV7-NEXT: ldr r5, [sp, #84]
+; ARMV7-NEXT: mov r12, r0
+; ARMV7-NEXT: ldr r1, [sp, #80]
+; ARMV7-NEXT: ldr r10, [sp, #72]
+; ARMV7-NEXT: umull r7, r6, r2, r5
+; ARMV7-NEXT: ldr r4, [sp, #76]
; ARMV7-NEXT: ldr r8, [sp, #88]
-; ARMV7-NEXT: mov r9, r0
-; ARMV7-NEXT: ldr r7, [sp, #96]
-; ARMV7-NEXT: ldr lr, [sp, #100]
-; ARMV7-NEXT: umull r0, r5, r2, r8
-; ARMV7-NEXT: ldr r4, [sp, #80]
-; ARMV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT: umull r1, r0, r3, r7
-; ARMV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMV7-NEXT: umull r0, r11, lr, r2
-; ARMV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT: ldr r1, [sp, #92]
-; ARMV7-NEXT: str r0, [sp] @ 4-byte Spill
-; ARMV7-NEXT: umull r0, r10, r7, r2
-; ARMV7-NEXT: mov r7, r1
-; ARMV7-NEXT: umull r6, r12, r1, r4
-; ARMV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; ARMV7-NEXT: ldr r0, [sp, #84]
-; ARMV7-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; ARMV7-NEXT: umull r6, r1, r0, r8
+; ARMV7-NEXT: umull lr, r0, r2, r1
; ARMV7-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; ARMV7-NEXT: umull r6, r2, r2, r7
-; ARMV7-NEXT: mov r7, r4
-; ARMV7-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; ARMV7-NEXT: str r2, [sp, #12] @ 4-byte Spill
-; ARMV7-NEXT: umull r2, r6, r4, r8
-; ARMV7-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; ARMV7-NEXT: str r6, [sp, #28] @ 4-byte Spill
; ARMV7-NEXT: mov r6, #0
-; ARMV7-NEXT: str r2, [r9]
-; ARMV7-NEXT: umlal r5, r6, r3, r8
-; ARMV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT: ldr r4, [sp] @ 4-byte Reload
-; ARMV7-NEXT: add r4, r4, r2
-; ARMV7-NEXT: adds r2, r10, r4
-; ARMV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT: mov r2, #0
-; ARMV7-NEXT: adc r2, r2, #0
-; ARMV7-NEXT: cmp r12, #0
-; ARMV7-NEXT: str r2, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT: movwne r12, #1
-; ARMV7-NEXT: cmp r1, #0
-; ARMV7-NEXT: ldr r2, [sp, #96]
-; ARMV7-NEXT: movwne r1, #1
-; ARMV7-NEXT: orrs r10, r7, r0
-; ARMV7-NEXT: movwne r10, #1
-; ARMV7-NEXT: orrs r7, r2, lr
-; ARMV7-NEXT: ldr r2, [sp, #92]
+; ARMV7-NEXT: mul r9, r4, r1
+; ARMV7-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; ARMV7-NEXT: umlal r0, r6, r3, r1
+; ARMV7-NEXT: umull r1, r7, r10, r1
+; ARMV7-NEXT: mul r11, r5, r10
+; ARMV7-NEXT: str r1, [sp, #32] @ 4-byte Spill
+; ARMV7-NEXT: mul r1, r3, r8
+; ARMV7-NEXT: str r7, [sp, #12] @ 4-byte Spill
+; ARMV7-NEXT: umull r7, r10, r8, r2
+; ARMV7-NEXT: subs r5, r11, #0
+; ARMV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; ARMV7-NEXT: ldr r1, [sp, #92]
+; ARMV7-NEXT: str r10, [sp, #24] @ 4-byte Spill
+; ARMV7-NEXT: str r7, [sp, #28] @ 4-byte Spill
+; ARMV7-NEXT: mul r2, r1, r2
+; ARMV7-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT: str lr, [r12]
+; ARMV7-NEXT: str r5, [sp] @ 4-byte Spill
+; ARMV7-NEXT: movwne r5, #1
+; ARMV7-NEXT: subs lr, r9, #0
+; ARMV7-NEXT: ldr r2, [sp, #72]
+; ARMV7-NEXT: ldr r9, [sp, #84]
+; ARMV7-NEXT: mov r7, lr
+; ARMV7-NEXT: ldr r11, [sp, #4] @ 4-byte Reload
; ARMV7-NEXT: movwne r7, #1
-; ARMV7-NEXT: cmp r0, #0
-; ARMV7-NEXT: movwne r0, #1
-; ARMV7-NEXT: cmp r2, #0
-; ARMV7-NEXT: mov r4, r2
-; ARMV7-NEXT: mov r8, r2
-; ARMV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; ARMV7-NEXT: movwne r4, #1
-; ARMV7-NEXT: and r0, r0, r4
-; ARMV7-NEXT: mov r4, #0
-; ARMV7-NEXT: adds r5, r2, r5
-; ARMV7-NEXT: str r5, [r9, #4]
-; ARMV7-NEXT: orr r0, r0, r1
-; ARMV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; ARMV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; ARMV7-NEXT: and r5, r10, r7
-; ARMV7-NEXT: orr r0, r0, r12
-; ARMV7-NEXT: mov r12, #0
-; ARMV7-NEXT: add r1, r2, r1
-; ARMV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; ARMV7-NEXT: adcs r2, r6, r2
-; ARMV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; ARMV7-NEXT: adc r7, r4, #0
-; ARMV7-NEXT: adds r1, r6, r1
...
[truncated]
|
a1116f2
to
87edae9
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
79f7844
to
02a54d1
Compare
We should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously. Properly lower UMULO and SMULO if not thumb1.
@davemgreen ping |
We should specify a custom lowering for SMULO and UMULO like we do for AArch64, but only if not Thumb 1 obviously.