diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9ad46df159c20..847b7af5a9b11 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -306,9 +306,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel { bool tryInsertVectorElt(SDNode *N); - // Select special operations if node forms integer ABS pattern - bool tryABSOp(SDNode *N); - bool tryReadRegister(SDNode *N); bool tryWriteRegister(SDNode *N); @@ -3459,45 +3456,6 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { return false; } -/// Target-specific DAG combining for ISD::SUB. -/// Target-independent combining lowers SELECT_CC nodes of the form -/// select_cc setg[ge] X, 0, X, -X -/// select_cc setgt X, -1, X, -X -/// select_cc setl[te] X, 0, -X, X -/// select_cc setlt X, 1, -X, X -/// which represent Integer ABS into: -/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -/// ARM instruction selection detects the latter and matches it to -/// ARM::ABS or ARM::t2ABS machine node. -bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ - SDValue SUBSrc0 = N->getOperand(0); - SDValue SUBSrc1 = N->getOperand(1); - EVT VT = N->getValueType(0); - - if (Subtarget->isThumb1Only()) - return false; - - if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA) - return false; - - SDValue XORSrc0 = SUBSrc0.getOperand(0); - SDValue XORSrc1 = SUBSrc0.getOperand(1); - SDValue SRASrc0 = SUBSrc1.getOperand(0); - SDValue SRASrc1 = SUBSrc1.getOperand(1); - ConstantSDNode *SRAConstant = dyn_cast(SRASrc1); - EVT XType = SRASrc0.getValueType(); - unsigned Size = XType.getSizeInBits() - 1; - - if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() && - SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { - unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; - CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0); - return true; - } - - return false; -} - /// We've got special pseudo-instructions for these void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; @@ -3685,12 +3643,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (tryInlineAsm(N)) return; break; - case ISD::SUB: - // Select special operations if SUB node forms integer ABS pattern - if (tryABSOp(N)) - return; - // Other cases are autogenerated. - break; case ISD::Constant: { unsigned Val = N->getAsZExtVal(); // If we can't materialize the constant we need to use a literal pool diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 4af2721562d7c..9052cbfa89deb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -647,6 +647,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, if (!Subtarget->hasV8_1MMainlineOps()) setOperationAction(ISD::UCMP, MVT::i32, Custom); + if (!Subtarget->isThumb1Only()) + setOperationAction(ISD::ABS, MVT::i32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); @@ -5621,6 +5624,19 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +// Generate CMP + CMOV for integer abs. +SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + + SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, MVT::i32); + + // Generate CMP & CMOV. + SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0), + DAG.getConstant(0, DL, MVT::i32)); + return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg, + DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp); +} + SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); @@ -10703,6 +10719,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UCMP: case ISD::SCMP: return LowerCMP(Op, DAG); + case ISD::ABS: + return LowerABS(Op, DAG); } } @@ -12288,89 +12306,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case ARM::Int_eh_sjlj_setup_dispatch: EmitSjLjDispatchBlock(MI, BB); return BB; - - case ARM::ABS: - case ARM::t2ABS: { - // To insert an ABS instruction, we have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // source vreg to test against 0, the destination vreg to set, - // the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - // It transforms - // V1 = ABS V0 - // into - // V2 = MOVS V0 - // BCC (branch to SinkBB if V0 >= 0) - // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) - // SinkBB: V1 = PHI(V2, V3) - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator BBI = ++BB->getIterator(); - MachineFunction *Fn = BB->getParent(); - MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); - Fn->insert(BBI, RSBBB); - Fn->insert(BBI, SinkBB); - - // Set the call frame size on entry to the new basic blocks. - unsigned CallFrameSize = TII->getCallFrameSizeAt(MI); - RSBBB->setCallFrameSize(CallFrameSize); - SinkBB->setCallFrameSize(CallFrameSize); - - Register ABSSrcReg = MI.getOperand(1).getReg(); - Register ABSDstReg = MI.getOperand(0).getReg(); - bool ABSSrcKIll = MI.getOperand(1).isKill(); - bool isThumb2 = Subtarget->isThumb2(); - MachineRegisterInfo &MRI = Fn->getRegInfo(); - // In Thumb mode S must not be specified if source register is the SP or - // PC and if destination register is the SP, so restrict register class - Register NewRsbDstReg = MRI.createVirtualRegister( - isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - SinkBB->splice(SinkBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - SinkBB->transferSuccessorsAndUpdatePHIs(BB); - - BB->addSuccessor(RSBBB); - BB->addSuccessor(SinkBB); - - // fall through to SinkMBB - RSBBB->addSuccessor(SinkBB); - - // insert a cmp at the end of BB - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(ABSSrcReg) - .addImm(0) - .add(predOps(ARMCC::AL)); - - // insert a bcc with opposite CC to ARMCC::MI at the end of BB - BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) - .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); - - // insert rsbri in RSBBB - // Note: BCC and rsbri will be converted into predicated rsbmi - // by if-conversion pass - BuildMI(*RSBBB, RSBBB->begin(), dl, - TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) - .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) - .addImm(0) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - - // insert PHI in SinkBB, - // reuse ABSDstReg to not change uses of ABS instruction - BuildMI(*SinkBB, SinkBB->begin(), dl, - TII->get(ARM::PHI), ABSDstReg) - .addReg(NewRsbDstReg).addMBB(RSBBB) - .addReg(ABSSrcReg).addMBB(BB); - - // remove ABS instruction - MI.eraseFromParent(); - - // return last added BB - return SinkBB; - } case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); @@ -14082,6 +14017,41 @@ static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) { CSINC.getOperand(3)); } +static bool isNegatedInteger(SDValue Op) { + return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)); +} + +// Try to fold +// +// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y)) +// +// The folding helps cmov to be matched with csneg without generating +// redundant neg instruction. +static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) { + if (!isNegatedInteger(SDValue(N, 0))) + return SDValue(); + + SDValue CMov = N->getOperand(1); + if (CMov.getOpcode() != ARMISD::CMOV || !CMov->hasOneUse()) + return SDValue(); + + SDValue N0 = CMov.getOperand(0); + SDValue N1 = CMov.getOperand(1); + + // If neither of them are negations, it's not worth the folding as it + // introduces two additional negations while reducing one negation. + if (!isNegatedInteger(N0) && !isNegatedInteger(N1)) + return SDValue(); + + SDLoc DL(N); + EVT VT = CMov.getValueType(); + + SDValue N0N = DAG.getNegative(N0, DL, VT); + SDValue N1N = DAG.getNegative(N1, DL, VT); + return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CMov.getOperand(2), + CMov.getOperand(3)); +} + /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. /// static SDValue PerformSUBCombine(SDNode *N, @@ -14098,6 +14068,9 @@ static SDValue PerformSUBCombine(SDNode *N, if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG)) return R; + if (SDValue Val = performNegCMovCombine(N, DCI.DAG)) + return Val; + if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector()) return SDValue(); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index ccf6d509313b9..8e417ac3e1a7b 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -908,6 +908,7 @@ class VectorType; SelectionDAG &DAG) const; SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index bdb16d7d39266..282ff534fc112 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -5185,12 +5185,6 @@ def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>, let hasSideEffects = 1; } -let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { - // Pseudo instruction that combines movs + predicated rsbmi - // to implement integer ABS - def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; -} - let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { def COPY_STRUCT_BYVAL_I32 : PseudoInst< (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment), diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c00d616670b5a..c229c8e4491df 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -4393,13 +4393,6 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), imm:$cp))]>, Requires<[IsThumb2]>; -// Pseudo instruction that combines movs + predicated rsbmi -// to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in { -def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src), - NoItinerary, []>, Requires<[IsThumb2]>; -} - //===----------------------------------------------------------------------===// // Coprocessor load/store -- for disassembly only // diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td index 3baac6b233c45..2dad18368947a 100644 --- a/llvm/lib/Target/ARM/ARMScheduleA57.td +++ b/llvm/lib/Target/ARM/ARMScheduleA57.td @@ -137,9 +137,8 @@ def : InstRW<[WriteNoop, WriteNoop], (instregex def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; // Pseudos -def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", - "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", - "tLDRpci_pic", "(t2)?SUBS_PC_LR", +def : InstRW<[WriteNoop], (instregex "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", + "(t2|t)?Int_eh_sjlj", "tLDRpci_pic", "(t2)?SUBS_PC_LR", "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", diff --git a/llvm/lib/Target/ARM/ARMScheduleR52.td b/llvm/lib/Target/ARM/ARMScheduleR52.td index e85646915117c..c350180baa250 100644 --- a/llvm/lib/Target/ARM/ARMScheduleR52.td +++ b/llvm/lib/Target/ARM/ARMScheduleR52.td @@ -235,7 +235,7 @@ def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1], "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD", "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT", "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX", - "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>; + "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>; // Parallel arithmetic def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], diff --git a/llvm/lib/Target/ARM/ARMScheduleSwift.td b/llvm/lib/Target/ARM/ARMScheduleSwift.td index 88682f5c0d2c4..8d53119f4747b 100644 --- a/llvm/lib/Target/ARM/ARMScheduleSwift.td +++ b/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -1081,9 +1081,6 @@ let SchedModel = SwiftModel in { def : WriteRes; def : WriteRes; def : WriteRes; - - // Not specified. - def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; // Preload. def : WriteRes { let Latency = 0; let ReleaseAtCycles = [0]; diff --git a/llvm/test/Analysis/CostModel/ARM/abs.ll b/llvm/test/Analysis/CostModel/ARM/abs.ll index 8c7fef3405127..e23af083e4758 100644 --- a/llvm/test/Analysis/CostModel/ARM/abs.ll +++ b/llvm/test/Analysis/CostModel/ARM/abs.ll @@ -16,27 +16,27 @@ define void @abs() { ; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-T2-LABEL: 'abs' -; CHECK-T2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) -; CHECK-T2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) -; CHECK-T2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) -; CHECK-T2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) -; CHECK-T2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:10 Lat:9 SizeLat:9 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false) +; CHECK-T2-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; CHECK-T2-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; CHECK-T2-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; CHECK-T2-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; CHECK-T2-NEXT: Cost Model: Found costs of 8 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false) ; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-81-LABEL: 'abs' -; CHECK-81-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) -; CHECK-81-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) -; CHECK-81-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) -; CHECK-81-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) -; CHECK-81-NEXT: Cost Model: Found costs of RThru:9 CodeSize:10 Lat:9 SizeLat:9 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false) +; CHECK-81-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; CHECK-81-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; CHECK-81-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; CHECK-81-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; CHECK-81-NEXT: Cost Model: Found costs of 8 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false) ; CHECK-81-NEXT: Cost Model: Found costs of 1 for: ret void ; ; CHECK-ARM-LABEL: 'abs' -; CHECK-ARM-NEXT: Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) -; CHECK-ARM-NEXT: Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) -; CHECK-ARM-NEXT: Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) -; CHECK-ARM-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) -; CHECK-ARM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:6 SizeLat:6 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false) +; CHECK-ARM-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; CHECK-ARM-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; CHECK-ARM-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; CHECK-ARM-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; CHECK-ARM-NEXT: Cost Model: Found costs of 8 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false) ; CHECK-ARM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll index 254c191569f8b..42563e8e34a94 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll @@ -31,22 +31,22 @@ declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1) define i32 @abs(i32 %arg) { ; MVE-LABEL: 'abs' -; MVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) -; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) -; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) -; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) ; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) diff --git a/llvm/test/CodeGen/Thumb2/abs.ll b/llvm/test/CodeGen/Thumb2/abs.ll index 88259ba758803..3cc3ec93e2404 100644 --- a/llvm/test/CodeGen/Thumb2/abs.ll +++ b/llvm/test/CodeGen/Thumb2/abs.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefix=CHECKT1 -; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2 -; RUN: llc -verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2 +; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2,CHECKT2V7 +; RUN: llc -verify-machineinstrs -mtriple=thumbv8.1m.main-none-eabi < %s | FileCheck %s --check-prefixes=CHECKT2,CHECKT2V8 declare i64 @llvm.abs.i64(i64, i1 immarg) @@ -39,11 +39,18 @@ define i32 @neg_abs32(i32 %x) { ; CHECKT1-NEXT: subs r0, r1, r0 ; CHECKT1-NEXT: bx lr ; -; CHECKT2-LABEL: neg_abs32: -; CHECKT2: @ %bb.0: -; CHECKT2-NEXT: eor.w r1, r0, r0, asr #31 -; CHECKT2-NEXT: rsb r0, r1, r0, asr #31 -; CHECKT2-NEXT: bx lr +; CHECKT2V7-LABEL: neg_abs32: +; CHECKT2V7: @ %bb.0: +; CHECKT2V7-NEXT: cmp r0, #0 +; CHECKT2V7-NEXT: it pl +; CHECKT2V7-NEXT: rsbpl r0, r0, #0 +; CHECKT2V7-NEXT: bx lr +; +; CHECKT2V8-LABEL: neg_abs32: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: cmp r0, #0 +; CHECKT2V8-NEXT: cneg r0, r0, pl +; CHECKT2V8-NEXT: bx lr %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) %neg = sub nsw i32 0, %abs ret i32 %neg @@ -145,12 +152,18 @@ define i32 @abs32(i32 %x) { ; CHECKT1-NEXT: subs r0, r0, r1 ; CHECKT1-NEXT: bx lr ; -; CHECKT2-LABEL: abs32: -; CHECKT2: @ %bb.0: -; CHECKT2-NEXT: cmp r0, #0 -; CHECKT2-NEXT: it mi -; CHECKT2-NEXT: rsbmi r0, r0, #0 -; CHECKT2-NEXT: bx lr +; CHECKT2V7-LABEL: abs32: +; CHECKT2V7: @ %bb.0: +; CHECKT2V7-NEXT: cmp r0, #0 +; CHECKT2V7-NEXT: it mi +; CHECKT2V7-NEXT: rsbmi r0, r0, #0 +; CHECKT2V7-NEXT: bx lr +; +; CHECKT2V8-LABEL: abs32: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: cmp r0, #0 +; CHECKT2V8-NEXT: cneg r0, r0, mi +; CHECKT2V8-NEXT: bx lr %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) ret i32 %abs } @@ -164,12 +177,20 @@ define i16 @abs16(i16 %x) { ; CHECKT1-NEXT: subs r0, r0, r1 ; CHECKT1-NEXT: bx lr ; -; CHECKT2-LABEL: abs16: -; CHECKT2: @ %bb.0: -; CHECKT2-NEXT: sxth r1, r0 -; CHECKT2-NEXT: eor.w r0, r0, r1, asr #15 -; CHECKT2-NEXT: sub.w r0, r0, r1, asr #15 -; CHECKT2-NEXT: bx lr +; CHECKT2V7-LABEL: abs16: +; CHECKT2V7: @ %bb.0: +; CHECKT2V7-NEXT: sxth r0, r0 +; CHECKT2V7-NEXT: cmp r0, #0 +; CHECKT2V7-NEXT: it mi +; CHECKT2V7-NEXT: rsbmi r0, r0, #0 +; CHECKT2V7-NEXT: bx lr +; +; CHECKT2V8-LABEL: abs16: +; CHECKT2V8: @ %bb.0: +; CHECKT2V8-NEXT: sxth r0, r0 +; CHECKT2V8-NEXT: cmp r0, #0 +; CHECKT2V8-NEXT: cneg r0, r0, mi +; CHECKT2V8-NEXT: bx lr %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true) ret i16 %abs }