Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ABS, MVT::i64, Custom);
}

setOperationAction(ISD::ABDS, MVT::i32, Custom);
setOperationAction(ISD::ABDS, MVT::i64, Custom);
setOperationAction(ISD::ABDU, MVT::i32, Custom);
setOperationAction(ISD::ABDU, MVT::i64, Custom);

setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
Expand Down Expand Up @@ -7200,6 +7205,40 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1));
}

// Generate SUBS and CNEG for absolute difference.
SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();

if (VT.isVector()) {
if (Op.getOpcode() == ISD::ABDS)
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
else
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
}

SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);

// Generate SUBS and CSEL for absolute difference (like LowerABS)
// Compute a - b with flags
SDValue Cmp =
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS);

// Compute b - a (negative of a - b)
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Cmp.getValue(0));

// For unsigned: use HS (a >= b) to select a-b, otherwise b-a
// For signed: use GE (a >= b) to select a-b, otherwise b-a
AArch64CC::CondCode CC =
(Op.getOpcode() == ISD::ABDS) ? AArch64CC::PL : AArch64CC::HS;

// CSEL: if a > b, select a-b, otherwise b-a
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg,
getCondCode(DAG, CC), Cmp.getValue(1));
}

static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
Expand Down Expand Up @@ -7649,9 +7688,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::ABS:
return LowerABS(Op, DAG);
case ISD::ABDS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
case ISD::ABDU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
return LowerABD(Op, DAG);
case ISD::AVGFLOORS:
return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
case ISD::AVGFLOORU:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
Expand Down
127 changes: 127 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasV8_1MMainlineOps())
setOperationAction(ISD::UCMP, MVT::i32, Custom);

setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ABDS, MVT::i32, Custom);
setOperationAction(ISD::ABDU, MVT::i32, Custom);

setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);

Expand Down Expand Up @@ -5621,6 +5625,79 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}

// Generate SUBS and CSEL for integer abs.
SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
// Thumb1-only sequence:
// asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
if (Subtarget->isThumb1Only()) {
SDValue X = Op.getOperand(0);
SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
SDValue S = DAG.getNode(ISD::SRA, DL, MVT::i32, X, ShiftAmt);
SDValue T = DAG.getNode(ISD::XOR, DL, MVT::i32, X, S);
return DAG.getNode(ISD::SUB, DL, MVT::i32, T, S);
}
SDValue Neg = DAG.getNode(ISD::SUB, DL, MVT::i32,
DAG.getConstant(0, DL, MVT::i32), Op.getOperand(0));
// Generate SUBS & CMOV.
SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0),
DAG.getConstant(0, DL, MVT::i32));
return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg,
DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
}

// Generate SUBS and CNEG for absolute difference.
SDValue ARMTargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);

// Thumb1-only custom sequences for i32
if (Subtarget->isThumb1Only()) {
if (Op.getOpcode() == ISD::ABDS) {
// subs r0, r0, r1; asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
SDValue D = DAG.getNode(ISD::SUB, DL, MVT::i32, LHS, RHS);
SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
SDValue S = DAG.getNode(ISD::SRA, DL, MVT::i32, D, ShiftAmt);
SDValue T = DAG.getNode(ISD::XOR, DL, MVT::i32, D, S);
return DAG.getNode(ISD::SUB, DL, MVT::i32, T, S);
} else {
// abdu: subs; sbcs r1,r1,r1(mask from borrow); eors; subs
// First subtraction: LHS - RHS
SDValue Sub1WithFlags = DAG.getNode(
ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
SDValue Sub1Result = Sub1WithFlags.getValue(0);
SDValue Flags1 = Sub1WithFlags.getValue(1);

SDValue Sbc1 = DAG.getNode(
ARMISD::SUBE, DL, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, Flags1);

SDValue Xor =
DAG.getNode(ISD::XOR, DL, MVT::i32, Sub1Result, Sbc1.getValue(0));

return DAG.getNode(ISD::SUB, DL, MVT::i32, Xor, Sbc1.getValue(0));
}
}

// Generate SUBS and CMOV for absolute difference (like LowerABS)
// Compute a - b with flags
SDValue Cmp =
DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);

// Compute b - a (negative of a - b)
SDValue Neg = DAG.getNode(ISD::SUB, DL, MVT::i32,
DAG.getConstant(0, DL, MVT::i32), Cmp.getValue(0));

// For unsigned: use LO (a < b) to select -(a-b), which is the same as b-a in
// twos complement, otherwise a-b For signed: use MI (a - b < 0) to select
// -(a-b), otherwise a-b
ARMCC::CondCodes CC = (Op.getOpcode() == ISD::ABDS) ? ARMCC::MI : ARMCC::LO;

// CMOV: if a > b, select a-b, otherwise negare
return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Cmp.getValue(0), Neg,
DAG.getConstant(CC, DL, MVT::i32), Cmp.getValue(1));
}

SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
Expand Down Expand Up @@ -10670,6 +10747,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTORE(Op, DAG, Subtarget);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::ABS:
return LowerABS(Op, DAG);
case ISD::ABDS:
case ISD::ABDU:
return LowerABD(Op, DAG);
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
Expand Down Expand Up @@ -14087,6 +14169,48 @@ static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {
CSINC.getOperand(3));
}

static bool isNegatedInteger(SDValue Op) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
}

static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Zero = DAG.getConstant(0, DL, VT);
return DAG.getNode(ISD::SUB, DL, VT, Zero, Op);
}

// Try to fold
//
// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y))
//
// The folding helps cmov to be matched with csneg without generating
// redundant neg instruction.
static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) {
if (!isNegatedInteger(SDValue(N, 0)))
return SDValue();

SDValue CSel = N->getOperand(1);
if (CSel.getOpcode() != ARMISD::CMOV || !CSel->hasOneUse())
return SDValue();

SDValue N0 = CSel.getOperand(0);
SDValue N1 = CSel.getOperand(1);

// If both of them is not negations, it's not worth the folding as it
// introduces two additional negations while reducing one negation.
if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
return SDValue();

SDValue N0N = getNegatedInteger(N0, DAG);
SDValue N1N = getNegatedInteger(N1, DAG);

SDLoc DL(N);
EVT VT = CSel.getValueType();
return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CSel.getOperand(2),
CSel.getOperand(3));
}

/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
///
static SDValue PerformSUBCombine(SDNode *N,
Expand All @@ -14103,6 +14227,9 @@ static SDValue PerformSUBCombine(SDNode *N,
if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
return R;

if (SDValue Val = performNegCMovCombine(N, DCI.DAG))
return Val;

if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
return SDValue();

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,9 @@ class VectorType;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;

SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
SmallVectorImpl<SDValue> &Results) const;
SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/ARM/mve-abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,22 @@ declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1)

define i32 @abs(i32 %arg) {
; MVE-LABEL: 'abs'
; MVE-NEXT: Cost Model: Found costs of RThru:5 CodeSize:6 Lat:5 SizeLat:5 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:74 CodeSize:55 Lat:74 SizeLat:74 for: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:148 CodeSize:110 Lat:148 SizeLat:148 for: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:296 CodeSize:220 Lat:296 SizeLat:296 for: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of 2 for: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:3 SizeLat:3 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of 2 for: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:40 CodeSize:29 Lat:40 SizeLat:40 for: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
Expand Down
Loading
Loading