-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[ARM][AArch64] Custom Lower abds and abd on ARM and AArch64 #155884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-aarch64 Author: AZero13 (AZero13) ChangesThey are grouped together because they are basically almost the same. Patch is 202.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155884.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c80bac02f41af..d23a7da58364f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ABS, MVT::i64, Custom);
}
+ setOperationAction(ISD::ABDS, MVT::i32, Custom);
+ setOperationAction(ISD::ABDS, MVT::i64, Custom);
+ setOperationAction(ISD::ABDU, MVT::i32, Custom);
+ setOperationAction(ISD::ABDU, MVT::i64, Custom);
+
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -7200,6 +7205,38 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1));
}
+// Generate SUBS and CNEG for absolute difference.
+SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ if (VT.isVector()) {
+ if (Op.getOpcode() == ISD::ABDS)
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
+ else
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+ }
+
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // Generate SUBS and CSEL for absolute difference (like LowerABS)
+ // Compute a - b with flags
+ SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+ LHS, RHS);
+
+ // Compute b - a (negative of a - b)
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Cmp.getValue(0));
+
+ // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+ // For signed: use GE (a >= b) to select a-b, otherwise b-a
+ AArch64CC::CondCode CC = (Op.getOpcode() == ISD::ABDS) ? AArch64CC::PL : AArch64CC::HS;
+
+ // CSEL: if a > b, select a-b, otherwise b-a
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg,
+ getCondCode(DAG, CC), Cmp.getValue(1));
+}
+
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
@@ -7649,9 +7686,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::ABS:
return LowerABS(Op, DAG);
case ISD::ABDS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
case ISD::ABDU:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+ return LowerABD(Op, DAG);
case ISD::AVGFLOORS:
return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
case ISD::AVGFLOORU:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6c6ae782f779f..20405653bb5fa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -598,6 +598,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4a158ef5bcae0..a88a21d25d33c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -647,6 +647,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasV8_1MMainlineOps())
setOperationAction(ISD::UCMP, MVT::i32, Custom);
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
+ setOperationAction(ISD::ABDS, MVT::i32, Custom);
+ setOperationAction(ISD::ABDU, MVT::i32, Custom);
+
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -5621,6 +5625,81 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+// Generate SUBS and CSEL for integer abs.
+SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ SDLoc DL(Op);
+ // Thumb1-only sequence:
+ // asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
+ if (Subtarget->isThumb1Only() && VT == MVT::i32) {
+ SDValue X = Op.getOperand(0);
+ SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
+ SDValue S = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
+ SDValue T = DAG.getNode(ISD::XOR, DL, VT, X, S);
+ return DAG.getNode(ISD::SUB, DL, VT, T, S);
+ }
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Op.getOperand(0));
+ // Generate SUBS & CSEL.
+ SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, DAG.getVTList(VT, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ return DAG.getNode(ARMISD::CMOV, DL, VT, Op.getOperand(0), Neg,
+ DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
+}
+
+// Generate SUBS and CNEG for absolute difference.
+SDValue ARMTargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // Thumb1-only custom sequences for i32
+ if (Subtarget->isThumb1Only() && VT == MVT::i32) {
+ if (Op.getOpcode() == ISD::ABDS) {
+ // subs r0, r0, r1; asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
+ SDValue D = DAG.getNode(ISD::SUB, DL, VT, LHS, RHS);
+ SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
+ SDValue S = DAG.getNode(ISD::SRA, DL, VT, D, ShiftAmt);
+ SDValue T = DAG.getNode(ISD::XOR, DL, VT, D, S);
+ return DAG.getNode(ISD::SUB, DL, VT, T, S);
+ } else {
+ // abdu: subs; sbcs r1,r1,r1(mask from borrow); eors; subs
+ // First subtraction: LHS - RHS
+ SDValue Sub1WithFlags = DAG.getNode(
+ ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SDValue Sub1Result = Sub1WithFlags.getValue(0);
+ SDValue Flags1 = Sub1WithFlags.getValue(1);
+
+ SDValue Sbc1 = DAG.getNode(
+ ARMISD::SUBE, DL, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, Flags1);
+
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, Sub1Result, Sbc1.getValue(0));
+
+ return DAG.getNode(ISD::SUB, DL, VT, Xor, Sbc1.getValue(0));
+ }
+ }
+
+ // Generate SUBS and CSEL for absolute difference (like LowerABS)
+ // Compute a - b with flags
+ SDValue Cmp =
+ DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+
+ // Compute b - a (negative of a - b)
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Cmp.getValue(0));
+
+ // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+ // For signed: use GE (a >= b) to select a-b, otherwise b-a
+ ARMCC::CondCodes CC = (Op.getOpcode() == ISD::ABDS) ? ARMCC::MI : ARMCC::LO;
+
+ // CSEL: if a > b, select a-b, otherwise b-a
+ return DAG.getNode(ARMISD::CMOV, DL, VT, Cmp.getValue(0), Neg,
+ DAG.getConstant(CC, DL, MVT::i32), Cmp.getValue(1));
+}
+
SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
@@ -10670,6 +10749,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTORE(Op, DAG, Subtarget);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
+ case ISD::ABS:
+ return LowerABS(Op, DAG);
+ case ISD::ABDS:
+ case ISD::ABDU:
+ return LowerABD(Op, DAG);
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 0185c8ddd4928..878d1ade096dc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -888,6 +888,9 @@ class VectorType;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+
+ SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
SmallVectorImpl<SDValue> &Results) const;
SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 02c76ba7343a0..7f879413cf449 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w1, w8
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w8, w1
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs w8, w1, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: subs w8, w8, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1, sxth
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs w8, w1, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x1, x0
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, pl
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x1, x0
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, pl
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -226,12 +225,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w1
-; CHECK-NEXT: sxtb w9, w0
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w10, w9, w8, lt
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: sub w0, w10, w8
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
%max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
@@ -242,12 +238,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: sxth w9, w0
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w10, w9, w8, lt
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: sub w0, w10, w8
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
%max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@@ -258,10 +251,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, lt
-; CHECK-NEXT: csel w9, w0, w1, gt
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -272,10 +263,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, lt
-; CHECK-NEXT: csel x9, x0, x1, gt
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, pl
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -343,7 +332,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%cmp = icmp sge i32 %a, %b
%ab = sub i32 %a, %b
@@ -356,7 +345,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: cneg x0, x8, pl
; CHECK-NEXT: ret
%cmp = icmp slt i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index bf52e71ec21fe..e461a747243a4 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -70,7 +70,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -99,7 +99,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -112,9 +112,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: subs w8, w0, w1, sxth
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -128,7 +127,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -142,7 +141,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -156,7 +155,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -236,7 +235,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -248,7 +247,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -308,7 +307,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp slt i32 %a, %b
%ab = sub i32 %a, %b
@@ -321,7 +320,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = sub i64 %a, %b
@@ -551,7 +550,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -564,7 +563,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 400031b64cb84..cc24bdc9e5c2d 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -10,7 +10,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: subs w8, w8, w1, uxtb
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@@ -26,7 +26,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: subs w8, w8, w1, uxth
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i16 %b to i64
@@ -42,7 +42,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: subs w8, w8, w1, uxtb
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@@ -58,7 +58,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: subs w8, w8, w1, uxth
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@@ -73,7 +73,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w1, w8
+; CHECK-NEXT: subs w8, w8, w1
; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -90,7 +90,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
...
[truncated]
|
|
@llvm/pr-subscribers-backend-arm Author: AZero13 (AZero13) ChangesThey are grouped together because they are basically almost the same. Patch is 202.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155884.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c80bac02f41af..d23a7da58364f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ABS, MVT::i64, Custom);
}
+ setOperationAction(ISD::ABDS, MVT::i32, Custom);
+ setOperationAction(ISD::ABDS, MVT::i64, Custom);
+ setOperationAction(ISD::ABDU, MVT::i32, Custom);
+ setOperationAction(ISD::ABDU, MVT::i64, Custom);
+
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -7200,6 +7205,38 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1));
}
+// Generate SUBS and CNEG for absolute difference.
+SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ if (VT.isVector()) {
+ if (Op.getOpcode() == ISD::ABDS)
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
+ else
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+ }
+
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // Generate SUBS and CSEL for absolute difference (like LowerABS)
+ // Compute a - b with flags
+ SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+ LHS, RHS);
+
+ // Compute b - a (negative of a - b)
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Cmp.getValue(0));
+
+ // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+ // For signed: use GE (a >= b) to select a-b, otherwise b-a
+ AArch64CC::CondCode CC = (Op.getOpcode() == ISD::ABDS) ? AArch64CC::PL : AArch64CC::HS;
+
+ // CSEL: if a > b, select a-b, otherwise b-a
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg,
+ getCondCode(DAG, CC), Cmp.getValue(1));
+}
+
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
@@ -7649,9 +7686,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::ABS:
return LowerABS(Op, DAG);
case ISD::ABDS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
case ISD::ABDU:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+ return LowerABD(Op, DAG);
case ISD::AVGFLOORS:
return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
case ISD::AVGFLOORU:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6c6ae782f779f..20405653bb5fa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -598,6 +598,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4a158ef5bcae0..a88a21d25d33c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -647,6 +647,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasV8_1MMainlineOps())
setOperationAction(ISD::UCMP, MVT::i32, Custom);
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
+ setOperationAction(ISD::ABDS, MVT::i32, Custom);
+ setOperationAction(ISD::ABDU, MVT::i32, Custom);
+
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -5621,6 +5625,81 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+// Generate SUBS and CSEL for integer abs.
+SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ SDLoc DL(Op);
+ // Thumb1-only sequence:
+ // asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
+ if (Subtarget->isThumb1Only() && VT == MVT::i32) {
+ SDValue X = Op.getOperand(0);
+ SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
+ SDValue S = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
+ SDValue T = DAG.getNode(ISD::XOR, DL, VT, X, S);
+ return DAG.getNode(ISD::SUB, DL, VT, T, S);
+ }
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Op.getOperand(0));
+ // Generate SUBS & CSEL.
+ SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, DAG.getVTList(VT, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(0, DL, VT));
+ return DAG.getNode(ARMISD::CMOV, DL, VT, Op.getOperand(0), Neg,
+ DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
+}
+
+// Generate SUBS and CNEG for absolute difference.
+SDValue ARMTargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // Thumb1-only custom sequences for i32
+ if (Subtarget->isThumb1Only() && VT == MVT::i32) {
+ if (Op.getOpcode() == ISD::ABDS) {
+ // subs r0, r0, r1; asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
+ SDValue D = DAG.getNode(ISD::SUB, DL, VT, LHS, RHS);
+ SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
+ SDValue S = DAG.getNode(ISD::SRA, DL, VT, D, ShiftAmt);
+ SDValue T = DAG.getNode(ISD::XOR, DL, VT, D, S);
+ return DAG.getNode(ISD::SUB, DL, VT, T, S);
+ } else {
+ // abdu: subs; sbcs r1,r1,r1(mask from borrow); eors; subs
+ // First subtraction: LHS - RHS
+ SDValue Sub1WithFlags = DAG.getNode(
+ ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SDValue Sub1Result = Sub1WithFlags.getValue(0);
+ SDValue Flags1 = Sub1WithFlags.getValue(1);
+
+ SDValue Sbc1 = DAG.getNode(
+ ARMISD::SUBE, DL, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, Flags1);
+
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, Sub1Result, Sbc1.getValue(0));
+
+ return DAG.getNode(ISD::SUB, DL, VT, Xor, Sbc1.getValue(0));
+ }
+ }
+
+ // Generate SUBS and CSEL for absolute difference (like LowerABS)
+ // Compute a - b with flags
+ SDValue Cmp =
+ DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+
+ // Compute b - a (negative of a - b)
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Cmp.getValue(0));
+
+ // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+ // For signed: use GE (a >= b) to select a-b, otherwise b-a
+ ARMCC::CondCodes CC = (Op.getOpcode() == ISD::ABDS) ? ARMCC::MI : ARMCC::LO;
+
+ // CSEL: if a > b, select a-b, otherwise b-a
+ return DAG.getNode(ARMISD::CMOV, DL, VT, Cmp.getValue(0), Neg,
+ DAG.getConstant(CC, DL, MVT::i32), Cmp.getValue(1));
+}
+
SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
@@ -10670,6 +10749,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTORE(Op, DAG, Subtarget);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
+ case ISD::ABS:
+ return LowerABS(Op, DAG);
+ case ISD::ABDS:
+ case ISD::ABDU:
+ return LowerABD(Op, DAG);
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 0185c8ddd4928..878d1ade096dc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -888,6 +888,9 @@ class VectorType;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+
+ SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
SmallVectorImpl<SDValue> &Results) const;
SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 02c76ba7343a0..7f879413cf449 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: subs w8, w1, w8
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w8, w1
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs w8, w1, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: subs w8, w8, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1, sxth
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs w8, w1, w0
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x1, x0
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, pl
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: subs x8, x1, x0
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, pl
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -226,12 +225,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: abd_minmax_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtb w8, w1
-; CHECK-NEXT: sxtb w9, w0
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w10, w9, w8, lt
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: sub w0, w10, w8
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: subs w8, w8, w1, sxtb
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
%max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
@@ -242,12 +238,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_minmax_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: sxth w9, w0
-; CHECK-NEXT: cmp w9, w8
-; CHECK-NEXT: csel w10, w9, w8, lt
-; CHECK-NEXT: csel w8, w9, w8, gt
-; CHECK-NEXT: sub w0, w10, w8
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: subs w8, w8, w1, sxth
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
%max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@@ -258,10 +251,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, w1
-; CHECK-NEXT: csel w8, w0, w1, lt
-; CHECK-NEXT: csel w9, w0, w1, gt
-; CHECK-NEXT: sub w0, w8, w9
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -272,10 +263,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csel x8, x0, x1, lt
-; CHECK-NEXT: csel x9, x0, x1, gt
-; CHECK-NEXT: sub x0, x8, x9
+; CHECK-NEXT: subs x8, x0, x1
+; CHECK-NEXT: cneg x0, x8, pl
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -343,7 +332,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, ge
+; CHECK-NEXT: cneg w0, w8, pl
; CHECK-NEXT: ret
%cmp = icmp sge i32 %a, %b
%ab = sub i32 %a, %b
@@ -356,7 +345,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, ge
+; CHECK-NEXT: cneg x0, x8, pl
; CHECK-NEXT: ret
%cmp = icmp slt i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index bf52e71ec21fe..e461a747243a4 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -70,7 +70,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: subs w8, w8, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i16 %a to i64
%bext = sext i32 %b to i64
@@ -99,7 +99,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -112,9 +112,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxth w8, w1
-; CHECK-NEXT: subs w8, w0, w8
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: subs w8, w0, w1, sxth
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i16 %b to i64
@@ -128,7 +127,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i32_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%aext = sext i32 %a to i64
%bext = sext i32 %b to i64
@@ -142,7 +141,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -156,7 +155,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_ext_i64_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%aext = sext i64 %a to i128
%bext = sext i64 %b to i128
@@ -236,7 +235,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_minmax_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -248,7 +247,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_minmax_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -308,7 +307,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_cmp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp slt i32 %a, %b
%ab = sub i32 %a, %b
@@ -321,7 +320,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = sub i64 %a, %b
@@ -551,7 +550,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_select_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: subs w8, w0, w1
-; CHECK-NEXT: cneg w0, w8, le
+; CHECK-NEXT: cneg w0, w8, mi
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %a, %b
%ab = select i1 %cmp, i32 %a, i32 %b
@@ -564,7 +563,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_select_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, le
+; CHECK-NEXT: cneg x0, x8, mi
; CHECK-NEXT: ret
%cmp = icmp sge i64 %a, %b
%ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 400031b64cb84..cc24bdc9e5c2d 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -10,7 +10,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: subs w8, w8, w1, uxtb
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@@ -26,7 +26,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: subs w8, w8, w1, uxth
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i16 %b to i64
@@ -42,7 +42,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: subs w8, w8, w1, uxtb
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@@ -58,7 +58,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: subs w8, w8, w1, uxth
-; CHECK-NEXT: cneg w0, w8, pl
+; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@@ -73,7 +73,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; CHECK-LABEL: abd_ext_i16_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: subs w8, w1, w8
+; CHECK-NEXT: subs w8, w8, w1
; CHECK-NEXT: cneg w0, w8, hs
; CHECK-NEXT: ret
%aext = zext i16 %a to i64
@@ -90,7 +90,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
efriedma-quic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CC @RKSimon
It looks like the improvements are mostly not from the lowering itself. The lowering is, as far as I can tell, basically what TargetLowering::expandABD would generate anyway, at least on AArch64. The difference is that certain DAGCombines trigger more aggressively.
Given that, we probably should change the conditions for those DAGCombines, instead of adding a bunch of useless custom lowering code.
|
@AZero13 It'd be better if we can determine the general missing DAG fold instead of micro-optimizing with custom lowering like this. |
They are grouped together because they are basically almost the same.