Skip to content

Commit 367e689

Browse files
committed
Custom Lower abds and abdu
1 parent 086ff8c commit 367e689

File tree

8 files changed

+157
-98
lines changed

8 files changed

+157
-98
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
698698
setOperationAction(ISD::ABS, MVT::i64, Custom);
699699
}
700700

701+
setOperationAction(ISD::ABDS, MVT::i32, Custom);
702+
setOperationAction(ISD::ABDS, MVT::i64, Custom);
703+
setOperationAction(ISD::ABDU, MVT::i32, Custom);
704+
setOperationAction(ISD::ABDU, MVT::i64, Custom);
705+
701706
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
702707
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
703708
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -7433,6 +7438,80 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
74337438
getCondCode(DAG, AArch64CC::PL), Cmp);
74347439
}
74357440

7441+
// Generate SUBS and CNEG for absolute difference.
7442+
SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
7443+
MVT VT = Op.getSimpleValueType();
7444+
7445+
bool IsSigned = Op.getOpcode() == ISD::ABDS;
7446+
if (VT.isVector()) {
7447+
if (IsSigned)
7448+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
7449+
else
7450+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
7451+
}
7452+
7453+
SDValue LHS = Op.getOperand(0);
7454+
SDValue RHS = Op.getOperand(1);
7455+
SDLoc DL(Op);
7456+
7457+
// If the subtract doesn't overflow then just use abs(sub())
7458+
bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
7459+
7460+
if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
7461+
return DAG.getNode(ISD::ABS, DL, VT,
7462+
DAG.getNode(ISD::SUB, DL, VT, LHS, RHS));
7463+
7464+
if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
7465+
return DAG.getNode(ISD::ABS, DL, VT,
7466+
DAG.getNode(ISD::SUB, DL, VT, RHS, LHS));
7467+
7468+
unsigned Opcode = AArch64ISD::SUBS;
7469+
// Check if RHS is a subtraction against 0: (0 - X)
7470+
if (RHS.getOpcode() == ISD::SUB) {
7471+
SDValue SubLHS = RHS.getOperand(0);
7472+
SDValue SubRHS = RHS.getOperand(1);
7473+
7474+
// Check if it's 0 - X
7475+
if (isNullConstant(SubLHS)) {
7476+
bool CanUseAdd = false;
7477+
if (IsSigned) {
7478+
// For UCMP: only if X is known to never be INT_MIN (to avoid overflow)
7479+
if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
7480+
.getSignedMinValue()
7481+
.isMinSignedValue()) {
7482+
CanUseAdd = true;
7483+
}
7484+
} else {
7485+
// For UCMP: only if X is known to never be zero
7486+
if (DAG.isKnownNeverZero(SubRHS)) {
7487+
CanUseAdd = true;
7488+
}
7489+
}
7490+
7491+
if (CanUseAdd) {
7492+
Opcode = AArch64ISD::ADDS;
7493+
RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of
7494+
// LHS - (0 - X)
7495+
}
7496+
}
7497+
}
7498+
7499+
// Generate SUBS and CSEL for absolute difference (like LowerABS)
7500+
// Compute a - b with flags
7501+
SDValue Cmp = DAG.getNode(Opcode, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS);
7502+
7503+
// Compute b - a (negative of a - b)
7504+
SDValue Neg = DAG.getNegative(Cmp.getValue(0), DL, VT);
7505+
7506+
// For unsigned: use HS (a >= b) to select a-b, otherwise b-a
7507+
// For signed: use GE (a >= b) to select a-b, otherwise b-a
7508+
AArch64CC::CondCode CC = IsSigned ? AArch64CC::GE : AArch64CC::HS;
7509+
7510+
// CSEL: if a > b, select a-b, otherwise b-a
7511+
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg,
7512+
getCondCode(DAG, CC), Cmp.getValue(1));
7513+
}
7514+
74367515
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
74377516
SDValue Chain = Op.getOperand(0);
74387517
SDValue Cond = Op.getOperand(1);
@@ -7885,9 +7964,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
78857964
case ISD::ABS:
78867965
return LowerABS(Op, DAG);
78877966
case ISD::ABDS:
7888-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
78897967
case ISD::ABDU:
7890-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
7968+
return LowerABD(Op, DAG);
78917969
case ISD::AVGFLOORS:
78927970
return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
78937971
case ISD::AVGFLOORU:

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,7 @@ class AArch64TargetLowering : public TargetLowering {
604604
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
605605
SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
606606
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
607+
SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
607608

608609
SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
609610
SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/AArch64/abds-neg.ll

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
7474
; CHECK: // %bb.0:
7575
; CHECK-NEXT: sxth w8, w0
7676
; CHECK-NEXT: subs w8, w8, w1
77-
; CHECK-NEXT: cneg w0, w8, gt
77+
; CHECK-NEXT: cneg w0, w8, ge
7878
; CHECK-NEXT: ret
7979
%aext = sext i16 %a to i64
8080
%bext = sext i32 %b to i64
@@ -105,7 +105,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
105105
; CHECK-LABEL: abd_ext_i32:
106106
; CHECK: // %bb.0:
107107
; CHECK-NEXT: subs w8, w0, w1
108-
; CHECK-NEXT: cneg w0, w8, gt
108+
; CHECK-NEXT: cneg w0, w8, ge
109109
; CHECK-NEXT: ret
110110
%aext = sext i32 %a to i64
111111
%bext = sext i32 %b to i64
@@ -120,7 +120,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
120120
; CHECK-LABEL: abd_ext_i32_i16:
121121
; CHECK: // %bb.0:
122122
; CHECK-NEXT: subs w8, w0, w1, sxth
123-
; CHECK-NEXT: cneg w0, w8, gt
123+
; CHECK-NEXT: cneg w0, w8, ge
124124
; CHECK-NEXT: ret
125125
%aext = sext i32 %a to i64
126126
%bext = sext i16 %b to i64
@@ -135,7 +135,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
135135
; CHECK-LABEL: abd_ext_i32_undef:
136136
; CHECK: // %bb.0:
137137
; CHECK-NEXT: subs w8, w0, w1
138-
; CHECK-NEXT: cneg w0, w8, gt
138+
; CHECK-NEXT: cneg w0, w8, ge
139139
; CHECK-NEXT: ret
140140
%aext = sext i32 %a to i64
141141
%bext = sext i32 %b to i64
@@ -150,7 +150,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
150150
; CHECK-LABEL: abd_ext_i64:
151151
; CHECK: // %bb.0:
152152
; CHECK-NEXT: subs x8, x0, x1
153-
; CHECK-NEXT: cneg x0, x8, gt
153+
; CHECK-NEXT: cneg x0, x8, ge
154154
; CHECK-NEXT: ret
155155
%aext = sext i64 %a to i128
156156
%bext = sext i64 %b to i128
@@ -165,7 +165,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
165165
; CHECK-LABEL: abd_ext_i64_undef:
166166
; CHECK: // %bb.0:
167167
; CHECK-NEXT: subs x8, x0, x1
168-
; CHECK-NEXT: cneg x0, x8, gt
168+
; CHECK-NEXT: cneg x0, x8, ge
169169
; CHECK-NEXT: ret
170170
%aext = sext i64 %a to i128
171171
%bext = sext i64 %b to i128
@@ -225,12 +225,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
225225
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
226226
; CHECK-LABEL: abd_minmax_i8:
227227
; CHECK: // %bb.0:
228-
; CHECK-NEXT: sxtb w8, w1
229-
; CHECK-NEXT: sxtb w9, w0
230-
; CHECK-NEXT: cmp w9, w8
231-
; CHECK-NEXT: csel w10, w9, w8, lt
232-
; CHECK-NEXT: csel w8, w9, w8, gt
233-
; CHECK-NEXT: sub w0, w10, w8
228+
; CHECK-NEXT: sxtb w8, w0
229+
; CHECK-NEXT: subs w8, w8, w1, sxtb
230+
; CHECK-NEXT: cneg w0, w8, pl
234231
; CHECK-NEXT: ret
235232
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
236233
%max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
@@ -241,12 +238,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
241238
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
242239
; CHECK-LABEL: abd_minmax_i16:
243240
; CHECK: // %bb.0:
244-
; CHECK-NEXT: sxth w8, w1
245-
; CHECK-NEXT: sxth w9, w0
246-
; CHECK-NEXT: cmp w9, w8
247-
; CHECK-NEXT: csel w10, w9, w8, lt
248-
; CHECK-NEXT: csel w8, w9, w8, gt
249-
; CHECK-NEXT: sub w0, w10, w8
241+
; CHECK-NEXT: sxth w8, w0
242+
; CHECK-NEXT: subs w8, w8, w1, sxth
243+
; CHECK-NEXT: cneg w0, w8, pl
250244
; CHECK-NEXT: ret
251245
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
252246
%max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@@ -257,10 +251,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
257251
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
258252
; CHECK-LABEL: abd_minmax_i32:
259253
; CHECK: // %bb.0:
260-
; CHECK-NEXT: cmp w0, w1
261-
; CHECK-NEXT: csel w8, w0, w1, lt
262-
; CHECK-NEXT: csel w9, w0, w1, gt
263-
; CHECK-NEXT: sub w0, w8, w9
254+
; CHECK-NEXT: subs w8, w0, w1
255+
; CHECK-NEXT: cneg w0, w8, ge
264256
; CHECK-NEXT: ret
265257
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
266258
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -271,10 +263,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
271263
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
272264
; CHECK-LABEL: abd_minmax_i64:
273265
; CHECK: // %bb.0:
274-
; CHECK-NEXT: cmp x0, x1
275-
; CHECK-NEXT: csel x8, x0, x1, lt
276-
; CHECK-NEXT: csel x9, x0, x1, gt
277-
; CHECK-NEXT: sub x0, x8, x9
266+
; CHECK-NEXT: subs x8, x0, x1
267+
; CHECK-NEXT: cneg x0, x8, ge
278268
; CHECK-NEXT: ret
279269
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
280270
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -445,7 +435,7 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
445435
; CHECK-LABEL: abd_subnsw_i32:
446436
; CHECK: // %bb.0:
447437
; CHECK-NEXT: subs w8, w0, w1
448-
; CHECK-NEXT: cneg w0, w8, pl
438+
; CHECK-NEXT: cneg w0, w8, ge
449439
; CHECK-NEXT: ret
450440
%sub = sub nsw i32 %a, %b
451441
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
@@ -457,7 +447,7 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
457447
; CHECK-LABEL: abd_subnsw_i32_undef:
458448
; CHECK: // %bb.0:
459449
; CHECK-NEXT: subs w8, w0, w1
460-
; CHECK-NEXT: cneg w0, w8, pl
450+
; CHECK-NEXT: cneg w0, w8, ge
461451
; CHECK-NEXT: ret
462452
%sub = sub nsw i32 %a, %b
463453
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
@@ -469,7 +459,7 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
469459
; CHECK-LABEL: abd_subnsw_i64:
470460
; CHECK: // %bb.0:
471461
; CHECK-NEXT: subs x8, x0, x1
472-
; CHECK-NEXT: cneg x0, x8, pl
462+
; CHECK-NEXT: cneg x0, x8, ge
473463
; CHECK-NEXT: ret
474464
%sub = sub nsw i64 %a, %b
475465
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
@@ -481,7 +471,7 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
481471
; CHECK-LABEL: abd_subnsw_i64_undef:
482472
; CHECK: // %bb.0:
483473
; CHECK-NEXT: subs x8, x0, x1
484-
; CHECK-NEXT: cneg x0, x8, pl
474+
; CHECK-NEXT: cneg x0, x8, ge
485475
; CHECK-NEXT: ret
486476
%sub = sub nsw i64 %a, %b
487477
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)

0 commit comments

Comments
 (0)