Skip to content

Commit 733c1ad

Browse files
authored
[ARM] Replace ABS and tABS machine nodes with custom lowering (#156717)
Just do a custom lowering instead. Also copy paste the cmov-neg fold to prevent regressions in nabs.
1 parent 8afea0d commit 733c1ad

File tree

11 files changed

+119
-189
lines changed

11 files changed

+119
-189
lines changed

llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -306,9 +306,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
306306

307307
bool tryInsertVectorElt(SDNode *N);
308308

309-
// Select special operations if node forms integer ABS pattern
310-
bool tryABSOp(SDNode *N);
311-
312309
bool tryReadRegister(SDNode *N);
313310
bool tryWriteRegister(SDNode *N);
314311

@@ -3459,45 +3456,6 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
34593456
return false;
34603457
}
34613458

3462-
/// Target-specific DAG combining for ISD::SUB.
3463-
/// Target-independent combining lowers SELECT_CC nodes of the form
3464-
/// select_cc setg[ge] X, 0, X, -X
3465-
/// select_cc setgt X, -1, X, -X
3466-
/// select_cc setl[te] X, 0, -X, X
3467-
/// select_cc setlt X, 1, -X, X
3468-
/// which represent Integer ABS into:
3469-
/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3470-
/// ARM instruction selection detects the latter and matches it to
3471-
/// ARM::ABS or ARM::t2ABS machine node.
3472-
bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3473-
SDValue SUBSrc0 = N->getOperand(0);
3474-
SDValue SUBSrc1 = N->getOperand(1);
3475-
EVT VT = N->getValueType(0);
3476-
3477-
if (Subtarget->isThumb1Only())
3478-
return false;
3479-
3480-
if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3481-
return false;
3482-
3483-
SDValue XORSrc0 = SUBSrc0.getOperand(0);
3484-
SDValue XORSrc1 = SUBSrc0.getOperand(1);
3485-
SDValue SRASrc0 = SUBSrc1.getOperand(0);
3486-
SDValue SRASrc1 = SUBSrc1.getOperand(1);
3487-
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3488-
EVT XType = SRASrc0.getValueType();
3489-
unsigned Size = XType.getSizeInBits() - 1;
3490-
3491-
if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3492-
SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3493-
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3494-
CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3495-
return true;
3496-
}
3497-
3498-
return false;
3499-
}
3500-
35013459
/// We've got special pseudo-instructions for these
35023460
void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
35033461
unsigned Opcode;
@@ -3685,12 +3643,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
36853643
if (tryInlineAsm(N))
36863644
return;
36873645
break;
3688-
case ISD::SUB:
3689-
// Select special operations if SUB node forms integer ABS pattern
3690-
if (tryABSOp(N))
3691-
return;
3692-
// Other cases are autogenerated.
3693-
break;
36943646
case ISD::Constant: {
36953647
unsigned Val = N->getAsZExtVal();
36963648
// If we can't materialize the constant we need to use a literal pool

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 56 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
647647
if (!Subtarget->hasV8_1MMainlineOps())
648648
setOperationAction(ISD::UCMP, MVT::i32, Custom);
649649

650+
if (!Subtarget->isThumb1Only())
651+
setOperationAction(ISD::ABS, MVT::i32, Custom);
652+
650653
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
651654
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
652655

@@ -5621,6 +5624,19 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
56215624
return SDValue();
56225625
}
56235626

5627+
// Generate CMP + CMOV for integer abs.
5628+
SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
5629+
SDLoc DL(Op);
5630+
5631+
SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, MVT::i32);
5632+
5633+
// Generate CMP & CMOV.
5634+
SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0),
5635+
DAG.getConstant(0, DL, MVT::i32));
5636+
return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg,
5637+
DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
5638+
}
5639+
56245640
SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
56255641
SDValue Chain = Op.getOperand(0);
56265642
SDValue Cond = Op.getOperand(1);
@@ -10703,6 +10719,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1070310719
case ISD::UCMP:
1070410720
case ISD::SCMP:
1070510721
return LowerCMP(Op, DAG);
10722+
case ISD::ABS:
10723+
return LowerABS(Op, DAG);
1070610724
}
1070710725
}
1070810726

@@ -12288,89 +12306,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1228812306
case ARM::Int_eh_sjlj_setup_dispatch:
1228912307
EmitSjLjDispatchBlock(MI, BB);
1229012308
return BB;
12291-
12292-
case ARM::ABS:
12293-
case ARM::t2ABS: {
12294-
// To insert an ABS instruction, we have to insert the
12295-
// diamond control-flow pattern. The incoming instruction knows the
12296-
// source vreg to test against 0, the destination vreg to set,
12297-
// the condition code register to branch on, the
12298-
// true/false values to select between, and a branch opcode to use.
12299-
// It transforms
12300-
// V1 = ABS V0
12301-
// into
12302-
// V2 = MOVS V0
12303-
// BCC (branch to SinkBB if V0 >= 0)
12304-
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12305-
// SinkBB: V1 = PHI(V2, V3)
12306-
const BasicBlock *LLVM_BB = BB->getBasicBlock();
12307-
MachineFunction::iterator BBI = ++BB->getIterator();
12308-
MachineFunction *Fn = BB->getParent();
12309-
MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12310-
MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12311-
Fn->insert(BBI, RSBBB);
12312-
Fn->insert(BBI, SinkBB);
12313-
12314-
// Set the call frame size on entry to the new basic blocks.
12315-
unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12316-
RSBBB->setCallFrameSize(CallFrameSize);
12317-
SinkBB->setCallFrameSize(CallFrameSize);
12318-
12319-
Register ABSSrcReg = MI.getOperand(1).getReg();
12320-
Register ABSDstReg = MI.getOperand(0).getReg();
12321-
bool ABSSrcKIll = MI.getOperand(1).isKill();
12322-
bool isThumb2 = Subtarget->isThumb2();
12323-
MachineRegisterInfo &MRI = Fn->getRegInfo();
12324-
// In Thumb mode S must not be specified if source register is the SP or
12325-
// PC and if destination register is the SP, so restrict register class
12326-
Register NewRsbDstReg = MRI.createVirtualRegister(
12327-
isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12328-
12329-
// Transfer the remainder of BB and its successor edges to sinkMBB.
12330-
SinkBB->splice(SinkBB->begin(), BB,
12331-
std::next(MachineBasicBlock::iterator(MI)), BB->end());
12332-
SinkBB->transferSuccessorsAndUpdatePHIs(BB);
12333-
12334-
BB->addSuccessor(RSBBB);
12335-
BB->addSuccessor(SinkBB);
12336-
12337-
// fall through to SinkMBB
12338-
RSBBB->addSuccessor(SinkBB);
12339-
12340-
// insert a cmp at the end of BB
12341-
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12342-
.addReg(ABSSrcReg)
12343-
.addImm(0)
12344-
.add(predOps(ARMCC::AL));
12345-
12346-
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
12347-
BuildMI(BB, dl,
12348-
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12349-
.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
12350-
12351-
// insert rsbri in RSBBB
12352-
// Note: BCC and rsbri will be converted into predicated rsbmi
12353-
// by if-conversion pass
12354-
BuildMI(*RSBBB, RSBBB->begin(), dl,
12355-
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12356-
.addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12357-
.addImm(0)
12358-
.add(predOps(ARMCC::AL))
12359-
.add(condCodeOp());
12360-
12361-
// insert PHI in SinkBB,
12362-
// reuse ABSDstReg to not change uses of ABS instruction
12363-
BuildMI(*SinkBB, SinkBB->begin(), dl,
12364-
TII->get(ARM::PHI), ABSDstReg)
12365-
.addReg(NewRsbDstReg).addMBB(RSBBB)
12366-
.addReg(ABSSrcReg).addMBB(BB);
12367-
12368-
// remove ABS instruction
12369-
MI.eraseFromParent();
12370-
12371-
// return last added BB
12372-
return SinkBB;
12373-
}
1237412309
case ARM::COPY_STRUCT_BYVAL_I32:
1237512310
++NumLoopByVals;
1237612311
return EmitStructByval(MI, BB);
@@ -14082,6 +14017,41 @@ static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {
1408214017
CSINC.getOperand(3));
1408314018
}
1408414019

14020+
static bool isNegatedInteger(SDValue Op) {
14021+
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
14022+
}
14023+
14024+
// Try to fold
14025+
//
14026+
// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y))
14027+
//
14028+
// The folding helps cmov to be matched with csneg without generating
14029+
// redundant neg instruction.
14030+
static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) {
14031+
if (!isNegatedInteger(SDValue(N, 0)))
14032+
return SDValue();
14033+
14034+
SDValue CMov = N->getOperand(1);
14035+
if (CMov.getOpcode() != ARMISD::CMOV || !CMov->hasOneUse())
14036+
return SDValue();
14037+
14038+
SDValue N0 = CMov.getOperand(0);
14039+
SDValue N1 = CMov.getOperand(1);
14040+
14041+
// If neither of them are negations, it's not worth the folding as it
14042+
// introduces two additional negations while reducing one negation.
14043+
if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
14044+
return SDValue();
14045+
14046+
SDLoc DL(N);
14047+
EVT VT = CMov.getValueType();
14048+
14049+
SDValue N0N = DAG.getNegative(N0, DL, VT);
14050+
SDValue N1N = DAG.getNegative(N1, DL, VT);
14051+
return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CMov.getOperand(2),
14052+
CMov.getOperand(3));
14053+
}
14054+
1408514055
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
1408614056
///
1408714057
static SDValue PerformSUBCombine(SDNode *N,
@@ -14098,6 +14068,9 @@ static SDValue PerformSUBCombine(SDNode *N,
1409814068
if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
1409914069
return R;
1410014070

14071+
if (SDValue Val = performNegCMovCombine(N, DCI.DAG))
14072+
return Val;
14073+
1410114074
if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
1410214075
return SDValue();
1410314076

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,7 @@ class VectorType;
908908
SelectionDAG &DAG) const;
909909
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
910910
SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const;
911+
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
911912

912913
Register getRegisterByName(const char* RegName, LLT VT,
913914
const MachineFunction &MF) const override;

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5185,12 +5185,6 @@ def SB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "sb", "", []>,
51855185
let hasSideEffects = 1;
51865186
}
51875187

5188-
let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
5189-
// Pseudo instruction that combines movs + predicated rsbmi
5190-
// to implement integer ABS
5191-
def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
5192-
}
5193-
51945188
let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
51955189
def COPY_STRUCT_BYVAL_I32 : PseudoInst<
51965190
(outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment),

llvm/lib/Target/ARM/ARMInstrThumb2.td

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4393,13 +4393,6 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
43934393
imm:$cp))]>,
43944394
Requires<[IsThumb2]>;
43954395

4396-
// Pseudo instruction that combines movs + predicated rsbmi
4397-
// to implement integer ABS
4398-
let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in {
4399-
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
4400-
NoItinerary, []>, Requires<[IsThumb2]>;
4401-
}
4402-
44034396
//===----------------------------------------------------------------------===//
44044397
// Coprocessor load/store -- for disassembly only
44054398
//

llvm/lib/Target/ARM/ARMScheduleA57.td

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,8 @@ def : InstRW<[WriteNoop, WriteNoop], (instregex
137137
def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
138138

139139
// Pseudos
140-
def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
141-
"(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
142-
"tLDRpci_pic", "(t2)?SUBS_PC_LR",
140+
def : InstRW<[WriteNoop], (instregex "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$",
141+
"(t2|t)?Int_eh_sjlj", "tLDRpci_pic", "(t2)?SUBS_PC_LR",
143142
"JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
144143
"VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
145144
"VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",

llvm/lib/Target/ARM/ARMScheduleR52.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
235235
"UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
236236
"t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
237237
"t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
238-
"t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
238+
"t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>;
239239

240240
// Parallel arithmetic
241241
def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],

llvm/lib/Target/ARM/ARMScheduleSwift.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,9 +1081,6 @@ let SchedModel = SwiftModel in {
10811081
def : WriteRes<WriteVST2, []>;
10821082
def : WriteRes<WriteVST3, []>;
10831083
def : WriteRes<WriteVST4, []>;
1084-
1085-
// Not specified.
1086-
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
10871084
// Preload.
10881085
def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0;
10891086
let ReleaseAtCycles = [0];

llvm/test/Analysis/CostModel/ARM/abs.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,27 @@ define void @abs() {
1616
; CHECK-T1-NEXT: Cost Model: Found costs of 1 for: ret void
1717
;
1818
; CHECK-T2-LABEL: 'abs'
19-
; CHECK-T2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
20-
; CHECK-T2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
21-
; CHECK-T2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
22-
; CHECK-T2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
23-
; CHECK-T2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:10 Lat:9 SizeLat:9 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false)
19+
; CHECK-T2-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
20+
; CHECK-T2-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
21+
; CHECK-T2-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
22+
; CHECK-T2-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
23+
; CHECK-T2-NEXT: Cost Model: Found costs of 8 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false)
2424
; CHECK-T2-NEXT: Cost Model: Found costs of 1 for: ret void
2525
;
2626
; CHECK-81-LABEL: 'abs'
27-
; CHECK-81-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
28-
; CHECK-81-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
29-
; CHECK-81-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
30-
; CHECK-81-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
31-
; CHECK-81-NEXT: Cost Model: Found costs of RThru:9 CodeSize:10 Lat:9 SizeLat:9 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false)
27+
; CHECK-81-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
28+
; CHECK-81-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
29+
; CHECK-81-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
30+
; CHECK-81-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
31+
; CHECK-81-NEXT: Cost Model: Found costs of 8 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false)
3232
; CHECK-81-NEXT: Cost Model: Found costs of 1 for: ret void
3333
;
3434
; CHECK-ARM-LABEL: 'abs'
35-
; CHECK-ARM-NEXT: Cost Model: Found costs of 3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
36-
; CHECK-ARM-NEXT: Cost Model: Found costs of 3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
37-
; CHECK-ARM-NEXT: Cost Model: Found costs of 3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
38-
; CHECK-ARM-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
39-
; CHECK-ARM-NEXT: Cost Model: Found costs of RThru:9 CodeSize:6 Lat:6 SizeLat:6 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false)
35+
; CHECK-ARM-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
36+
; CHECK-ARM-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
37+
; CHECK-ARM-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
38+
; CHECK-ARM-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
39+
; CHECK-ARM-NEXT: Cost Model: Found costs of 8 for: %I128 = call i128 @llvm.abs.i128(i128 undef, i1 false)
4040
; CHECK-ARM-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
4141
;
4242
%I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)

llvm/test/Analysis/CostModel/ARM/mve-abs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,22 @@ declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1)
3131

3232
define i32 @abs(i32 %arg) {
3333
; MVE-LABEL: 'abs'
34-
; MVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
34+
; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
3535
; MVE-NEXT: Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
3636
; MVE-NEXT: Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
3737
; MVE-NEXT: Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
38-
; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
38+
; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
3939
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
4040
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
4141
; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
4242
; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
43-
; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
43+
; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
4444
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
4545
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
4646
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
4747
; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
4848
; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
49-
; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
49+
; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
5050
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
5151
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
5252
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)

0 commit comments

Comments
 (0)