[ARM][AArch64] Custom Lower abds and abd on ARM and AArch64 #155884

AZero13 · 2025-08-28T16:24:36Z

They are grouped together because they are basically almost the same.

llvmbot · 2025-08-28T16:25:07Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-backend-aarch64

Author: AZero13 (AZero13)

Changes

They are grouped together because they are basically almost the same.

Patch is 202.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155884.diff

13 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+38-2)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+1)
(modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+84)
(modified) llvm/lib/Target/ARM/ARMISelLowering.h (+3)
(modified) llvm/test/CodeGen/AArch64/abds-neg.ll (+24-35)
(modified) llvm/test/CodeGen/AArch64/abds.ll (+13-14)
(modified) llvm/test/CodeGen/AArch64/abdu-neg.ll (+21-32)
(modified) llvm/test/CodeGen/AArch64/abdu.ll (+25-26)
(added) llvm/test/CodeGen/ARM/abds-neg.ll (+1341)
(added) llvm/test/CodeGen/ARM/abds.ll (+1495)
(added) llvm/test/CodeGen/ARM/abdu-neg.ll (+1009)
(added) llvm/test/CodeGen/ARM/abdu.ll (+1048)
(modified) llvm/test/CodeGen/ARM/iabs.ll (+78-34)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c80bac02f41af..d23a7da58364f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ABS, MVT::i64, Custom);
   }
 
+  setOperationAction(ISD::ABDS, MVT::i32, Custom);
+  setOperationAction(ISD::ABDS, MVT::i64, Custom);
+  setOperationAction(ISD::ABDU, MVT::i32, Custom);
+  setOperationAction(ISD::ABDU, MVT::i64, Custom);
+
   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -7200,6 +7205,38 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
                      getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1));
 }
 
+// Generate SUBS and CNEG for absolute difference.
+SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+
+  if (VT.isVector()) {
+    if (Op.getOpcode() == ISD::ABDS)
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
+    else
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+  }
+
+  SDLoc DL(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  
+  // Generate SUBS and CSEL for absolute difference (like LowerABS)
+  // Compute a - b with flags
+  SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+                            LHS, RHS);
+  
+  // Compute b - a (negative of a - b)
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Cmp.getValue(0));
+  
+  // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+  // For signed: use GE (a >= b) to select a-b, otherwise b-a
+  AArch64CC::CondCode CC = (Op.getOpcode() == ISD::ABDS) ? AArch64CC::PL : AArch64CC::HS;
+  
+  // CSEL: if a > b, select a-b, otherwise b-a
+  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg,
+                     getCondCode(DAG, CC), Cmp.getValue(1));
+}
+
 static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
   SDValue Chain = Op.getOperand(0);
   SDValue Cond = Op.getOperand(1);
@@ -7649,9 +7686,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::ABS:
     return LowerABS(Op, DAG);
   case ISD::ABDS:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
   case ISD::ABDU:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+    return LowerABD(Op, DAG);
   case ISD::AVGFLOORS:
     return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
   case ISD::AVGFLOORU:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6c6ae782f779f..20405653bb5fa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -598,6 +598,7 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4a158ef5bcae0..a88a21d25d33c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -647,6 +647,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
   if (!Subtarget->hasV8_1MMainlineOps())
     setOperationAction(ISD::UCMP, MVT::i32, Custom);
 
+  setOperationAction(ISD::ABS, MVT::i32, Custom);
+  setOperationAction(ISD::ABDS, MVT::i32, Custom);
+  setOperationAction(ISD::ABDU, MVT::i32, Custom);
+
   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 
@@ -5621,6 +5625,81 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+// Generate SUBS and CSEL for integer abs.
+SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+
+  SDLoc DL(Op);
+  // Thumb1-only sequence:
+  // asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
+  if (Subtarget->isThumb1Only() && VT == MVT::i32) {
+    SDValue X = Op.getOperand(0);
+    SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
+    SDValue S = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
+    SDValue T = DAG.getNode(ISD::XOR, DL, VT, X, S);
+    return DAG.getNode(ISD::SUB, DL, VT, T, S);
+  }
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                            Op.getOperand(0));
+  // Generate SUBS & CSEL.
+  SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, DAG.getVTList(VT, MVT::i32),
+                            Op.getOperand(0), DAG.getConstant(0, DL, VT));
+  return DAG.getNode(ARMISD::CMOV, DL, VT, Op.getOperand(0), Neg,
+                     DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
+}
+
+// Generate SUBS and CNEG for absolute difference.
+SDValue ARMTargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+
+  SDLoc DL(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+
+  // Thumb1-only custom sequences for i32
+  if (Subtarget->isThumb1Only() && VT == MVT::i32) {
+    if (Op.getOpcode() == ISD::ABDS) {
+      // subs r0, r0, r1; asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
+      SDValue D = DAG.getNode(ISD::SUB, DL, VT, LHS, RHS);
+      SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
+      SDValue S = DAG.getNode(ISD::SRA, DL, VT, D, ShiftAmt);
+      SDValue T = DAG.getNode(ISD::XOR, DL, VT, D, S);
+      return DAG.getNode(ISD::SUB, DL, VT, T, S);
+    } else {
+      // abdu: subs; sbcs r1,r1,r1(mask from borrow); eors; subs
+      // First subtraction: LHS - RHS
+      SDValue Sub1WithFlags = DAG.getNode(
+          ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+      SDValue Sub1Result = Sub1WithFlags.getValue(0);
+      SDValue Flags1 = Sub1WithFlags.getValue(1);
+
+      SDValue Sbc1 = DAG.getNode(
+          ARMISD::SUBE, DL, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, Flags1);
+
+      SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, Sub1Result, Sbc1.getValue(0));
+
+      return DAG.getNode(ISD::SUB, DL, VT, Xor, Sbc1.getValue(0));
+    }
+  }
+
+  // Generate SUBS and CSEL for absolute difference (like LowerABS)
+  // Compute a - b with flags
+  SDValue Cmp =
+      DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+
+  // Compute b - a (negative of a - b)
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                            Cmp.getValue(0));
+
+  // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+  // For signed: use GE (a >= b) to select a-b, otherwise b-a
+  ARMCC::CondCodes CC = (Op.getOpcode() == ISD::ABDS) ? ARMCC::MI : ARMCC::LO;
+
+  // CSEL: if a > b, select a-b, otherwise b-a
+  return DAG.getNode(ARMISD::CMOV, DL, VT, Cmp.getValue(0), Neg,
+                     DAG.getConstant(CC, DL, MVT::i32), Cmp.getValue(1));
+}
+
 SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Cond = Op.getOperand(1);
@@ -10670,6 +10749,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSTORE(Op, DAG, Subtarget);
   case ISD::MLOAD:
     return LowerMLOAD(Op, DAG);
+  case ISD::ABS:
+    return LowerABS(Op, DAG);
+  case ISD::ABDS:
+  case ISD::ABDU:
+    return LowerABD(Op, DAG);
   case ISD::VECREDUCE_MUL:
   case ISD::VECREDUCE_AND:
   case ISD::VECREDUCE_OR:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 0185c8ddd4928..878d1ade096dc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -888,6 +888,9 @@ class VectorType;
     SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+
+    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
     void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
                            SmallVectorImpl<SDValue> &Results) const;
     SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 02c76ba7343a0..7f879413cf449 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    subs w8, w1, w8
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w8, w1
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
   %bext = sext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    subs w8, w8, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1, sxth
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i16 %b to i64
@@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -226,12 +225,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    csel w10, w9, w8, lt
-; CHECK-NEXT:    csel w8, w9, w8, gt
-; CHECK-NEXT:    sub w0, w10, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
   %max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
@@ -242,12 +238,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    csel w10, w9, w8, lt
-; CHECK-NEXT:    csel w8, w9, w8, gt
-; CHECK-NEXT:    sub w0, w10, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
   %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@@ -258,10 +251,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
 define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    csel w8, w0, w1, lt
-; CHECK-NEXT:    csel w9, w0, w1, gt
-; CHECK-NEXT:    sub w0, w8, w9
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -272,10 +263,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
 define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp x0, x1
-; CHECK-NEXT:    csel x8, x0, x1, lt
-; CHECK-NEXT:    csel x9, x0, x1, gt
-; CHECK-NEXT:    sub x0, x8, x9
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, pl
 ; CHECK-NEXT:    ret
   %min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
   %max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -343,7 +332,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i32 %a, %b
   %ab = sub i32 %a, %b
@@ -356,7 +345,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    cneg x0, x8, pl
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i64 %a, %b
   %ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index bf52e71ec21fe..e461a747243a4 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -70,7 +70,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w0
 ; CHECK-NEXT:    subs w8, w8, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
   %bext = sext i32 %b to i64
@@ -99,7 +99,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -112,9 +112,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    subs w8, w0, w8
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    subs w8, w0, w1, sxth
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i16 %b to i64
@@ -128,7 +127,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -142,7 +141,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -156,7 +155,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -236,7 +235,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -248,7 +247,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
   %max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -308,7 +307,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i32 %a, %b
   %ab = sub i32 %a, %b
@@ -321,7 +320,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i64 %a, %b
   %ab = sub i64 %a, %b
@@ -551,7 +550,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_select_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sgt i32 %a, %b
   %ab = select i1 %cmp, i32 %a, i32 %b
@@ -564,7 +563,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_select_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i64 %a, %b
   %ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 400031b64cb84..cc24bdc9e5c2d 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -10,7 +10,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    subs w8, w8, w1, uxtb
-; CHECK-NEXT:    cneg w0, w8, pl
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
   %bext = zext i8 %b to i64
@@ -26,7 +26,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    subs w8, w8, w1, uxth
-; CHECK-NEXT:    cneg w0, w8, pl
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
   %bext = zext i16 %b to i64
@@ -42,7 +42,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    subs w8, w8, w1, uxtb
-; CHECK-NEXT:    cneg w0, w8, pl
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
   %bext = zext i8 %b to i64
@@ -58,7 +58,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
 ; CHECK-NEXT:    subs w8, w8, w1, uxth
-; CHECK-NEXT:    cneg w0, w8, pl
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
   %bext = zext i16 %b to i64
@@ -73,7 +73,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    subs w8, w1, w8
+; CHECK-NEXT:    subs w8, w8, w1
 ; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
@@ -90,7 +90,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
 ...
[truncated]

llvmbot · 2025-08-28T16:25:08Z

@llvm/pr-subscribers-backend-arm

Author: AZero13 (AZero13)

Changes

They are grouped together because they are basically almost the same.

Patch is 202.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155884.diff

13 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+38-2)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+1)
(modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+84)
(modified) llvm/lib/Target/ARM/ARMISelLowering.h (+3)
(modified) llvm/test/CodeGen/AArch64/abds-neg.ll (+24-35)
(modified) llvm/test/CodeGen/AArch64/abds.ll (+13-14)
(modified) llvm/test/CodeGen/AArch64/abdu-neg.ll (+21-32)
(modified) llvm/test/CodeGen/AArch64/abdu.ll (+25-26)
(added) llvm/test/CodeGen/ARM/abds-neg.ll (+1341)
(added) llvm/test/CodeGen/ARM/abds.ll (+1495)
(added) llvm/test/CodeGen/ARM/abdu-neg.ll (+1009)
(added) llvm/test/CodeGen/ARM/abdu.ll (+1048)
(modified) llvm/test/CodeGen/ARM/iabs.ll (+78-34)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c80bac02f41af..d23a7da58364f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -698,6 +698,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ABS, MVT::i64, Custom);
   }
 
+  setOperationAction(ISD::ABDS, MVT::i32, Custom);
+  setOperationAction(ISD::ABDS, MVT::i64, Custom);
+  setOperationAction(ISD::ABDU, MVT::i32, Custom);
+  setOperationAction(ISD::ABDU, MVT::i64, Custom);
+
   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -7200,6 +7205,38 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
                      getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1));
 }
 
+// Generate SUBS and CNEG for absolute difference.
+SDValue AArch64TargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+
+  if (VT.isVector()) {
+    if (Op.getOpcode() == ISD::ABDS)
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
+    else
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+  }
+
+  SDLoc DL(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  
+  // Generate SUBS and CSEL for absolute difference (like LowerABS)
+  // Compute a - b with flags
+  SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
+                            LHS, RHS);
+  
+  // Compute b - a (negative of a - b)
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Cmp.getValue(0));
+  
+  // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+  // For signed: use GE (a >= b) to select a-b, otherwise b-a
+  AArch64CC::CondCode CC = (Op.getOpcode() == ISD::ABDS) ? AArch64CC::PL : AArch64CC::HS;
+  
+  // CSEL: if a > b, select a-b, otherwise b-a
+  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Cmp.getValue(0), Neg,
+                     getCondCode(DAG, CC), Cmp.getValue(1));
+}
+
 static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
   SDValue Chain = Op.getOperand(0);
   SDValue Cond = Op.getOperand(1);
@@ -7649,9 +7686,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::ABS:
     return LowerABS(Op, DAG);
   case ISD::ABDS:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
   case ISD::ABDU:
-    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
+    return LowerABD(Op, DAG);
   case ISD::AVGFLOORS:
     return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
   case ISD::AVGFLOORU:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6c6ae782f779f..20405653bb5fa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -598,6 +598,7 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4a158ef5bcae0..a88a21d25d33c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -647,6 +647,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
   if (!Subtarget->hasV8_1MMainlineOps())
     setOperationAction(ISD::UCMP, MVT::i32, Custom);
 
+  setOperationAction(ISD::ABS, MVT::i32, Custom);
+  setOperationAction(ISD::ABDS, MVT::i32, Custom);
+  setOperationAction(ISD::ABDU, MVT::i32, Custom);
+
   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 
@@ -5621,6 +5625,81 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+// Generate SUBS and CSEL for integer abs.
+SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+
+  SDLoc DL(Op);
+  // Thumb1-only sequence:
+  // asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
+  if (Subtarget->isThumb1Only() && VT == MVT::i32) {
+    SDValue X = Op.getOperand(0);
+    SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
+    SDValue S = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
+    SDValue T = DAG.getNode(ISD::XOR, DL, VT, X, S);
+    return DAG.getNode(ISD::SUB, DL, VT, T, S);
+  }
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                            Op.getOperand(0));
+  // Generate SUBS & CSEL.
+  SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, DAG.getVTList(VT, MVT::i32),
+                            Op.getOperand(0), DAG.getConstant(0, DL, VT));
+  return DAG.getNode(ARMISD::CMOV, DL, VT, Op.getOperand(0), Neg,
+                     DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
+}
+
+// Generate SUBS and CNEG for absolute difference.
+SDValue ARMTargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+
+  SDLoc DL(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+
+  // Thumb1-only custom sequences for i32
+  if (Subtarget->isThumb1Only() && VT == MVT::i32) {
+    if (Op.getOpcode() == ISD::ABDS) {
+      // subs r0, r0, r1; asrs r1, r0, #31; eors r0, r1; subs r0, r0, r1
+      SDValue D = DAG.getNode(ISD::SUB, DL, VT, LHS, RHS);
+      SDValue ShiftAmt = DAG.getConstant(31, DL, MVT::i32);
+      SDValue S = DAG.getNode(ISD::SRA, DL, VT, D, ShiftAmt);
+      SDValue T = DAG.getNode(ISD::XOR, DL, VT, D, S);
+      return DAG.getNode(ISD::SUB, DL, VT, T, S);
+    } else {
+      // abdu: subs; sbcs r1,r1,r1(mask from borrow); eors; subs
+      // First subtraction: LHS - RHS
+      SDValue Sub1WithFlags = DAG.getNode(
+          ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+      SDValue Sub1Result = Sub1WithFlags.getValue(0);
+      SDValue Flags1 = Sub1WithFlags.getValue(1);
+
+      SDValue Sbc1 = DAG.getNode(
+          ARMISD::SUBE, DL, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, Flags1);
+
+      SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, Sub1Result, Sbc1.getValue(0));
+
+      return DAG.getNode(ISD::SUB, DL, VT, Xor, Sbc1.getValue(0));
+    }
+  }
+
+  // Generate SUBS and CSEL for absolute difference (like LowerABS)
+  // Compute a - b with flags
+  SDValue Cmp =
+      DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(VT, MVT::i32), LHS, RHS);
+
+  // Compute b - a (negative of a - b)
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                            Cmp.getValue(0));
+
+  // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+  // For signed: use GE (a >= b) to select a-b, otherwise b-a
+  ARMCC::CondCodes CC = (Op.getOpcode() == ISD::ABDS) ? ARMCC::MI : ARMCC::LO;
+
+  // CSEL: if a > b, select a-b, otherwise b-a
+  return DAG.getNode(ARMISD::CMOV, DL, VT, Cmp.getValue(0), Neg,
+                     DAG.getConstant(CC, DL, MVT::i32), Cmp.getValue(1));
+}
+
 SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Cond = Op.getOperand(1);
@@ -10670,6 +10749,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerSTORE(Op, DAG, Subtarget);
   case ISD::MLOAD:
     return LowerMLOAD(Op, DAG);
+  case ISD::ABS:
+    return LowerABS(Op, DAG);
+  case ISD::ABDS:
+  case ISD::ABDU:
+    return LowerABD(Op, DAG);
   case ISD::VECREDUCE_MUL:
   case ISD::VECREDUCE_AND:
   case ISD::VECREDUCE_OR:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 0185c8ddd4928..878d1ade096dc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -888,6 +888,9 @@ class VectorType;
     SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+
+    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerABD(SDValue Op, SelectionDAG &DAG) const;
     void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
                            SmallVectorImpl<SDValue> &Results) const;
     SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll
index 02c76ba7343a0..7f879413cf449 100644
--- a/llvm/test/CodeGen/AArch64/abds-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abds-neg.ll
@@ -73,8 +73,8 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w0
-; CHECK-NEXT:    subs w8, w1, w8
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w8, w1
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
   %bext = sext i32 %b to i64
@@ -104,8 +104,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -119,9 +119,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    subs w8, w8, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1, sxth
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i16 %b to i64
@@ -135,8 +134,8 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w1, w0
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -150,8 +149,8 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -165,8 +164,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs x8, x1, x0
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, pl
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -226,12 +225,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
 define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxtb w8, w1
-; CHECK-NEXT:    sxtb w9, w0
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    csel w10, w9, w8, lt
-; CHECK-NEXT:    csel w8, w9, w8, gt
-; CHECK-NEXT:    sub w0, w10, w8
+; CHECK-NEXT:    sxtb w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxtb
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
   %max = call i8 @llvm.smax.i8(i8 %a, i8 %b)
@@ -242,12 +238,9 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
 define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    sxth w9, w0
-; CHECK-NEXT:    cmp w9, w8
-; CHECK-NEXT:    csel w10, w9, w8, lt
-; CHECK-NEXT:    csel w8, w9, w8, gt
-; CHECK-NEXT:    sub w0, w10, w8
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    subs w8, w8, w1, sxth
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
   %max = call i16 @llvm.smax.i16(i16 %a, i16 %b)
@@ -258,10 +251,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
 define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp w0, w1
-; CHECK-NEXT:    csel w8, w0, w1, lt
-; CHECK-NEXT:    csel w9, w0, w1, gt
-; CHECK-NEXT:    sub w0, w8, w9
+; CHECK-NEXT:    subs w8, w0, w1
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -272,10 +263,8 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
 define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    cmp x0, x1
-; CHECK-NEXT:    csel x8, x0, x1, lt
-; CHECK-NEXT:    csel x9, x0, x1, gt
-; CHECK-NEXT:    sub x0, x8, x9
+; CHECK-NEXT:    subs x8, x0, x1
+; CHECK-NEXT:    cneg x0, x8, pl
 ; CHECK-NEXT:    ret
   %min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
   %max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -343,7 +332,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, ge
+; CHECK-NEXT:    cneg w0, w8, pl
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i32 %a, %b
   %ab = sub i32 %a, %b
@@ -356,7 +345,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, ge
+; CHECK-NEXT:    cneg x0, x8, pl
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i64 %a, %b
   %ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index bf52e71ec21fe..e461a747243a4 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -70,7 +70,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sxth w8, w0
 ; CHECK-NEXT:    subs w8, w8, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i16 %a to i64
   %bext = sext i32 %b to i64
@@ -99,7 +99,7 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -112,9 +112,8 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    subs w8, w0, w8
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    subs w8, w0, w1, sxth
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i16 %b to i64
@@ -128,7 +127,7 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i32_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i32 %a to i64
   %bext = sext i32 %b to i64
@@ -142,7 +141,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -156,7 +155,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i64_undef:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %aext = sext i64 %a to i128
   %bext = sext i64 %b to i128
@@ -236,7 +235,7 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
   %max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -248,7 +247,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_minmax_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
   %max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -308,7 +307,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i32 %a, %b
   %ab = sub i32 %a, %b
@@ -321,7 +320,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_cmp_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i64 %a, %b
   %ab = sub i64 %a, %b
@@ -551,7 +550,7 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_select_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs w8, w0, w1
-; CHECK-NEXT:    cneg w0, w8, le
+; CHECK-NEXT:    cneg w0, w8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sgt i32 %a, %b
   %ab = select i1 %cmp, i32 %a, i32 %b
@@ -564,7 +563,7 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: abd_select_i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    subs x8, x0, x1
-; CHECK-NEXT:    cneg x0, x8, le
+; CHECK-NEXT:    cneg x0, x8, mi
 ; CHECK-NEXT:    ret
   %cmp = icmp sge i64 %a, %b
   %ab = select i1 %cmp, i64 %a, i64 %b
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 400031b64cb84..cc24bdc9e5c2d 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -10,7 +10,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    subs w8, w8, w1, uxtb
-; CHECK-NEXT:    cneg w0, w8, pl
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
   %bext = zext i8 %b to i64
@@ -26,7 +26,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    subs w8, w8, w1, uxth
-; CHECK-NEXT:    cneg w0, w8, pl
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
   %bext = zext i16 %b to i64
@@ -42,7 +42,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    subs w8, w8, w1, uxtb
-; CHECK-NEXT:    cneg w0, w8, pl
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i8 %a to i64
   %bext = zext i8 %b to i64
@@ -58,7 +58,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
 ; CHECK-NEXT:    subs w8, w8, w1, uxth
-; CHECK-NEXT:    cneg w0, w8, pl
+; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
   %bext = zext i16 %b to i64
@@ -73,7 +73,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; CHECK-LABEL: abd_ext_i16_i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
-; CHECK-NEXT:    subs w8, w1, w8
+; CHECK-NEXT:    subs w8, w8, w1
 ; CHECK-NEXT:    cneg w0, w8, hs
 ; CHECK-NEXT:    ret
   %aext = zext i16 %a to i64
@@ -90,7 +90,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
 ...
[truncated]

github-actions · 2025-08-28T16:27:03Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/test/CodeGen/Thumb2/abs.ll

efriedma-quic

CC @RKSimon

It looks like the improvements are mostly not from the lowering itself. The lowering is, as far as I can tell, basically what TargetLowering::expandABD would generate anyway, at least on AArch64. The difference is that certain DAGCombines trigger more aggressively.

Given that, we probably should change the conditions for those DAGCombines, instead of adding a bunch of useless custom lowering code.

RKSimon · 2025-09-03T13:19:22Z

@AZero13 It'd be better if we can determine the general missing DAG fold instead of micro-optimizing with custom lowering like this.

Pre-commit tests (NFC)

55ceda5

llvmbot added backend:ARM backend:AArch64 labels Aug 28, 2025

AZero13 force-pushed the absd branch from a941073 to dea7858 Compare August 28, 2025 16:54

[ARM][AArch64] Custom Lower ABDS and ABDU

13f2a7b

AZero13 force-pushed the absd branch from dea7858 to 13f2a7b Compare August 28, 2025 18:52

llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Aug 28, 2025

efriedma-quic reviewed Aug 28, 2025

View reviewed changes

llvm/test/CodeGen/Thumb2/abs.ll Show resolved Hide resolved

AZero13 requested a review from efriedma-quic August 30, 2025 15:42

Fixes

9edf3c2

AZero13 force-pushed the absd branch from 27f020e to 9edf3c2 Compare August 30, 2025 16:08

efriedma-quic reviewed Sep 2, 2025

View reviewed changes

AZero13 marked this pull request as draft September 3, 2025 20:53

AZero13 closed this Sep 16, 2025

AZero13 deleted the absd branch September 16, 2025 13:03

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[ARM][AArch64] Custom Lower abds and abd on ARM and AArch64 #155884

[ARM][AArch64] Custom Lower abds and abd on ARM and AArch64 #155884

Uh oh!

AZero13 commented Aug 28, 2025

Uh oh!

llvmbot commented Aug 28, 2025 •

edited

Loading

Uh oh!

llvmbot commented Aug 28, 2025

Uh oh!

github-actions bot commented Aug 28, 2025 •

edited

Loading

Uh oh!

Uh oh!

efriedma-quic left a comment

Uh oh!

RKSimon commented Sep 3, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[ARM][AArch64] Custom Lower abds and abd on ARM and AArch64 #155884

[ARM][AArch64] Custom Lower abds and abd on ARM and AArch64 #155884

Uh oh!

Conversation

AZero13 commented Aug 28, 2025

Uh oh!

llvmbot commented Aug 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 28, 2025

Uh oh!

github-actions bot commented Aug 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

efriedma-quic left a comment

Choose a reason for hiding this comment

Uh oh!

RKSimon commented Sep 3, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Aug 28, 2025 •

edited

Loading

github-actions bot commented Aug 28, 2025 •

edited

Loading