@@ -647,6 +647,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
647
647
if (!Subtarget->hasV8_1MMainlineOps())
648
648
setOperationAction(ISD::UCMP, MVT::i32, Custom);
649
649
650
+ if (!Subtarget->isThumb1Only())
651
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
652
+
650
653
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
651
654
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
652
655
@@ -5621,6 +5624,19 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5621
5624
return SDValue();
5622
5625
}
5623
5626
5627
+ // Generate CMP + CMOV for integer abs.
5628
+ SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
5629
+ SDLoc DL(Op);
5630
+
5631
+ SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, MVT::i32);
5632
+
5633
+ // Generate CMP & CMOV.
5634
+ SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0),
5635
+ DAG.getConstant(0, DL, MVT::i32));
5636
+ return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg,
5637
+ DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
5638
+ }
5639
+
5624
5640
SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5625
5641
SDValue Chain = Op.getOperand(0);
5626
5642
SDValue Cond = Op.getOperand(1);
@@ -10703,6 +10719,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10703
10719
case ISD::UCMP:
10704
10720
case ISD::SCMP:
10705
10721
return LowerCMP(Op, DAG);
10722
+ case ISD::ABS:
10723
+ return LowerABS(Op, DAG);
10706
10724
}
10707
10725
}
10708
10726
@@ -12288,89 +12306,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
12288
12306
case ARM::Int_eh_sjlj_setup_dispatch:
12289
12307
EmitSjLjDispatchBlock(MI, BB);
12290
12308
return BB;
12291
-
12292
- case ARM::ABS:
12293
- case ARM::t2ABS: {
12294
- // To insert an ABS instruction, we have to insert the
12295
- // diamond control-flow pattern. The incoming instruction knows the
12296
- // source vreg to test against 0, the destination vreg to set,
12297
- // the condition code register to branch on, the
12298
- // true/false values to select between, and a branch opcode to use.
12299
- // It transforms
12300
- // V1 = ABS V0
12301
- // into
12302
- // V2 = MOVS V0
12303
- // BCC (branch to SinkBB if V0 >= 0)
12304
- // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12305
- // SinkBB: V1 = PHI(V2, V3)
12306
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
12307
- MachineFunction::iterator BBI = ++BB->getIterator();
12308
- MachineFunction *Fn = BB->getParent();
12309
- MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12310
- MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12311
- Fn->insert(BBI, RSBBB);
12312
- Fn->insert(BBI, SinkBB);
12313
-
12314
- // Set the call frame size on entry to the new basic blocks.
12315
- unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12316
- RSBBB->setCallFrameSize(CallFrameSize);
12317
- SinkBB->setCallFrameSize(CallFrameSize);
12318
-
12319
- Register ABSSrcReg = MI.getOperand(1).getReg();
12320
- Register ABSDstReg = MI.getOperand(0).getReg();
12321
- bool ABSSrcKIll = MI.getOperand(1).isKill();
12322
- bool isThumb2 = Subtarget->isThumb2();
12323
- MachineRegisterInfo &MRI = Fn->getRegInfo();
12324
- // In Thumb mode S must not be specified if source register is the SP or
12325
- // PC and if destination register is the SP, so restrict register class
12326
- Register NewRsbDstReg = MRI.createVirtualRegister(
12327
- isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12328
-
12329
- // Transfer the remainder of BB and its successor edges to sinkMBB.
12330
- SinkBB->splice(SinkBB->begin(), BB,
12331
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
12332
- SinkBB->transferSuccessorsAndUpdatePHIs(BB);
12333
-
12334
- BB->addSuccessor(RSBBB);
12335
- BB->addSuccessor(SinkBB);
12336
-
12337
- // fall through to SinkMBB
12338
- RSBBB->addSuccessor(SinkBB);
12339
-
12340
- // insert a cmp at the end of BB
12341
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12342
- .addReg(ABSSrcReg)
12343
- .addImm(0)
12344
- .add(predOps(ARMCC::AL));
12345
-
12346
- // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12347
- BuildMI(BB, dl,
12348
- TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12349
- .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
12350
-
12351
- // insert rsbri in RSBBB
12352
- // Note: BCC and rsbri will be converted into predicated rsbmi
12353
- // by if-conversion pass
12354
- BuildMI(*RSBBB, RSBBB->begin(), dl,
12355
- TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12356
- .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12357
- .addImm(0)
12358
- .add(predOps(ARMCC::AL))
12359
- .add(condCodeOp());
12360
-
12361
- // insert PHI in SinkBB,
12362
- // reuse ABSDstReg to not change uses of ABS instruction
12363
- BuildMI(*SinkBB, SinkBB->begin(), dl,
12364
- TII->get(ARM::PHI), ABSDstReg)
12365
- .addReg(NewRsbDstReg).addMBB(RSBBB)
12366
- .addReg(ABSSrcReg).addMBB(BB);
12367
-
12368
- // remove ABS instruction
12369
- MI.eraseFromParent();
12370
-
12371
- // return last added BB
12372
- return SinkBB;
12373
- }
12374
12309
case ARM::COPY_STRUCT_BYVAL_I32:
12375
12310
++NumLoopByVals;
12376
12311
return EmitStructByval(MI, BB);
@@ -14082,6 +14017,41 @@ static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {
14082
14017
CSINC.getOperand(3));
14083
14018
}
14084
14019
14020
+ static bool isNegatedInteger(SDValue Op) {
14021
+ return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
14022
+ }
14023
+
14024
+ // Try to fold
14025
+ //
14026
+ // (neg (cmov X, Y)) -> (cmov (neg X), (neg Y))
14027
+ //
14028
+ // The folding helps cmov to be matched with csneg without generating
14029
+ // redundant neg instruction.
14030
+ static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) {
14031
+ if (!isNegatedInteger(SDValue(N, 0)))
14032
+ return SDValue();
14033
+
14034
+ SDValue CMov = N->getOperand(1);
14035
+ if (CMov.getOpcode() != ARMISD::CMOV || !CMov->hasOneUse())
14036
+ return SDValue();
14037
+
14038
+ SDValue N0 = CMov.getOperand(0);
14039
+ SDValue N1 = CMov.getOperand(1);
14040
+
14041
+ // If neither of them are negations, it's not worth the folding as it
14042
+ // introduces two additional negations while reducing one negation.
14043
+ if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
14044
+ return SDValue();
14045
+
14046
+ SDLoc DL(N);
14047
+ EVT VT = CMov.getValueType();
14048
+
14049
+ SDValue N0N = DAG.getNegative(N0, DL, VT);
14050
+ SDValue N1N = DAG.getNegative(N1, DL, VT);
14051
+ return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CMov.getOperand(2),
14052
+ CMov.getOperand(3));
14053
+ }
14054
+
14085
14055
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14086
14056
///
14087
14057
static SDValue PerformSUBCombine(SDNode *N,
@@ -14098,6 +14068,9 @@ static SDValue PerformSUBCombine(SDNode *N,
14098
14068
if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14099
14069
return R;
14100
14070
14071
+ if (SDValue Val = performNegCMovCombine(N, DCI.DAG))
14072
+ return Val;
14073
+
14101
14074
if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14102
14075
return SDValue();
14103
14076
0 commit comments