Skip to content

Conversation

@ecnelises
Copy link
Member

@ecnelises ecnelises commented Sep 21, 2023

InstCombine does below transformation:

; bool cmp(i64 a) { return a != (i64)(iN)a; }
%0 = add i64 %a, -2^(N-1)
%1 = icmp ult i64 %0, -2^N
ret i1 %1

; bool cmp(i64 a) { return a == (i64)(iN)a; }
%0 = add i64 %a, 2^(N-1)
%1 = icmp ult i64 %0, 2^N
ret i1 %1

On PowerPC, it's profitable to combine this add-cmpult back to original trunc-ext-cmpeq.

if (LHS.getOpcode() == ISD::ADD && isa<ConstantSDNode>(LHS.getOperand(1))) {
uint64_t Addend = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue();
if (OpVT == MVT::i64) {
// (a-2^(M-1)) => sext(trunc(a, M), 64)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When is this equation hold?

@bzEq
Copy link
Collaborator

bzEq commented Jan 29, 2024

Please provide more description in PR summary.

@ecnelises
Copy link
Member Author

The patterns are

; ult (add x -0x80000000) -0x100000000 -> ne x (sext:i64 (trunc:i32 x))
; ult (add x -0x8000) -0x10000 -> ne x (sext:i64 (trunc:i16 x))
; ult (add x -0x80) -0x100 -> ne x (sext:i64 (trunc:i8 x))
; ult (add x -0x80) -0x100 -> ne x (sext:i32 (trunc:i16 x))
; ult (add x -0x80) -0x100 -> ne x (sext:i16 (trunc:i8 x))
; ult (srl (add x -0x8000) 16) 0xffff -> ne x (sext:i32 (trunc:i16 x))

Only the last one differs from the rest, because it's been transformed in generic DAG combiner. (https://alive2.llvm.org/ce/z/jAHgKM ) The rest are just reverting transformation of InstCombine.

@github-actions
Copy link

github-actions bot commented Apr 3, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

; CHECK-NEXT: cntlzw 3, 3
; CHECK-NEXT: srwi 3, 3, 5
; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will investigate about the regression.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not be regression. Parameter should have signext.

@llvmbot
Copy link
Member

llvmbot commented Apr 10, 2024

@llvm/pr-subscribers-backend-powerpc

Author: Qiu Chaofan (ecnelises)

Changes

InstCombine does below transformation:

; bool cmp(i64 a) { return a != (i64)(iN)a; }
%0 = add i64 %a, -2^(N-1)
%1 = icmp ult i64 %0, -2^N
ret i1 %1

; bool cmp(i64 a) { return a == (i64)(iN)a; }
%0 = add i64 %a, 2^(N-1)
%1 = icmp ult i64 %0, 2^N
ret i1 %1

On PowerPC, it's profitable to combine this add-cmpult back to original trunc-ext-cmpeq.


Full diff: https://github.com/llvm/llvm-project/pull/66978.diff

2 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+47-10)
  • (modified) llvm/test/CodeGen/PowerPC/setcc-to-sub.ll (+30-38)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 43e4a34a9b3483..b13d603efb9479 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14527,15 +14527,18 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
       ShiftCst);
 }
 
-SDValue PPCTargetLowering::combineSetCC(SDNode *N,
-                                        DAGCombinerInfo &DCI) const {
-  assert(N->getOpcode() == ISD::SETCC &&
-         "Should be called with a SETCC node");
+SDValue PPCTargetLowering::combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const {
+  assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
 
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDLoc DL(N);
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  EVT OpVT = LHS.getValueType();
+
   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
-    SDValue LHS = N->getOperand(0);
-    SDValue RHS = N->getOperand(1);
 
     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
@@ -14546,15 +14549,49 @@ SDValue PPCTargetLowering::combineSetCC(SDNode *N,
     // x != 0-y --> x+y != 0
     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
         RHS.hasOneUse()) {
-      SDLoc DL(N);
-      SelectionDAG &DAG = DCI.DAG;
-      EVT VT = N->getValueType(0);
-      EVT OpVT = LHS.getValueType();
       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
     }
   }
 
+  if (CC == ISD::SETULT) {
+    auto GetTruncExtCmp = [&](SDValue Src, EVT DstVT) {
+      return DAG.getSetCC(
+          DL, VT, Src,
+          DAG.getSExtOrTrunc(DAG.getSExtOrTrunc(Src, DL, DstVT), DL, OpVT),
+          ISD::SETNE);
+    };
+    // ult (add x -0x80000000) -0x100000000 -> ne x (sext:i64 (trunc:i32 x))
+    // ult (add x -0x8000) -0x10000 -> ne x (sext:i64 (trunc:i16 x))
+    // ult (add x -0x80) -0x100 -> ne x (sext:i64/i32/i16 (trunc:i8/i16/i8 x))
+    if (LHS.getOpcode() == ISD::ADD) {
+      const auto *Addend = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+      const auto *RhsC = dyn_cast<ConstantSDNode>(RHS);
+      if (Addend && RhsC) {
+        int64_t AddendVal = Addend->getSExtValue();
+        int64_t RhsVal = RhsC->getSExtValue();
+        if (AddendVal == -0x80000000L && RhsVal == -0x100000000L &&
+            OpVT == MVT::i64)
+          return GetTruncExtCmp(LHS.getOperand(0), MVT::i32);
+        if (AddendVal == -0x8000 && RhsVal == -0x10000 && OpVT == MVT::i64)
+          return GetTruncExtCmp(LHS.getOperand(0), MVT::i16);
+        if (AddendVal == -0x80 && RhsVal == -0x100 &&
+            (OpVT == MVT::i64 || OpVT == MVT::i32 || OpVT == MVT::i16))
+          return GetTruncExtCmp(LHS.getOperand(0), MVT::i8);
+      }
+    // ult (srl (add x -0x8000) 16) 0xffff -> ne x (sext:i32 (trunc:i16 x))
+    } else if (LHS.getOpcode() == ISD::SRL &&
+               LHS.getOperand(0).getOpcode() == ISD::ADD) {
+      const auto *SrlAmt = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+      const auto *Addend =
+          dyn_cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1));
+      const auto *RhsC = dyn_cast<ConstantSDNode>(RHS);
+      if (SrlAmt && Addend && RhsC && SrlAmt->getSExtValue() == 16 &&
+          Addend->getSExtValue() == -0x8000 && RhsC->getSExtValue() == 0xffff)
+        return GetTruncExtCmp(LHS.getOperand(0).getOperand(0), MVT::i8);
+    }
+  }
+
   return DAGCombineTruncBoolExt(N, DCI);
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/setcc-to-sub.ll b/llvm/test/CodeGen/PowerPC/setcc-to-sub.ll
index 20dcb8ccf4908a..cb6170ab175e7e 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-to-sub.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-to-sub.ll
@@ -92,12 +92,10 @@ entry:
 define zeroext i1 @test5(i64 %a) {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    addis 3, 3, -32768
-; CHECK-NEXT:    rldic 4, 4, 32, 0
-; CHECK-NEXT:    subc 4, 3, 4
-; CHECK-NEXT:    subfe 3, 3, 3
-; CHECK-NEXT:    neg 3, 3
+; CHECK-NEXT:    extsw 4, 3
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    addic 4, 3, -1
+; CHECK-NEXT:    subfe 3, 4, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = add i64 %a, -2147483648
@@ -108,11 +106,10 @@ entry:
 define zeroext i1 @test6(i64 %a) {
 ; CHECK-LABEL: test6:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi 3, 3, -32768
-; CHECK-NEXT:    lis 4, -1
-; CHECK-NEXT:    subc 4, 3, 4
-; CHECK-NEXT:    subfe 3, 3, 3
-; CHECK-NEXT:    neg 3, 3
+; CHECK-NEXT:    extsh 4, 3
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    addic 4, 3, -1
+; CHECK-NEXT:    subfe 3, 4, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = add i64 %a, -32768
@@ -123,11 +120,10 @@ entry:
 define zeroext i1 @test7(i64 %a) {
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi 3, 3, -128
-; CHECK-NEXT:    li 4, -256
-; CHECK-NEXT:    subc 4, 3, 4
-; CHECK-NEXT:    subfe 3, 3, 3
-; CHECK-NEXT:    neg 3, 3
+; CHECK-NEXT:    extsb 4, 3
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    addic 4, 3, -1
+; CHECK-NEXT:    subfe 3, 4, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = add i64 %a, -128
@@ -135,15 +131,14 @@ entry:
   ret i1 %cmp
 }
 
-define zeroext i1 @test8(i32 %a) {
+define zeroext i1 @test8(i32 signext %a) {
 ; CHECK-LABEL: test8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi 3, 3, -32768
-; CHECK-NEXT:    lis 4, -1
-; CHECK-NEXT:    rlwinm 3, 3, 16, 16, 31
-; CHECK-NEXT:    ori 4, 4, 1
-; CHECK-NEXT:    add 3, 3, 4
-; CHECK-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-NEXT:    extsb 4, 3
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    xori 3, 3, 1
 ; CHECK-NEXT:    blr
 entry:
   %0 = add i32 %a, -32768
@@ -151,16 +146,14 @@ entry:
   ret i1 %cmp
 }
 
-define zeroext i1 @test9(i32 %a) {
+define zeroext i1 @test9(i32 signext %a) {
 ; CHECK-LABEL: test9:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lis 4, -256
-; CHECK-NEXT:    addi 3, 3, -128
-; CHECK-NEXT:    ori 4, 4, 1
-; CHECK-NEXT:    clrldi 3, 3, 32
-; CHECK-NEXT:    rldic 4, 4, 8, 0
-; CHECK-NEXT:    add 3, 3, 4
-; CHECK-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-NEXT:    extsb 4, 3
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    xori 3, 3, 1
 ; CHECK-NEXT:    blr
 entry:
   %0 = add i32 %a, -128
@@ -168,15 +161,14 @@ entry:
   ret i1 %cmp
 }
 
-define zeroext i1 @test10(i16 %a) {
+define zeroext i1 @test10(i16 signext %a) {
 ; CHECK-LABEL: test10:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi 3, 3, -128
-; CHECK-NEXT:    lis 4, -1
-; CHECK-NEXT:    clrlwi 3, 3, 16
-; CHECK-NEXT:    ori 4, 4, 256
-; CHECK-NEXT:    add 3, 3, 4
-; CHECK-NEXT:    rldicl 3, 3, 1, 63
+; CHECK-NEXT:    extsb 4, 3
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    xori 3, 3, 1
 ; CHECK-NEXT:    blr
 entry:
   %0 = add i16 %a, -128

@ecnelises ecnelises closed this Oct 21, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants