-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV] Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc). #154206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This helsp the 3 vendor extensions that make sext_inreg i1 legal. I'm delaying this until after LegalizeDAG since we normally have sext_inreg i1 up until LegalizeDAG turns it into and+neg. I also delayed the recently added (sext_inreg (xor (setcc), -1), i1) combine. Though the xor isn't likely to appear before LegalizeDAG anyway.
|
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis helsp the 3 vendor extensions that make sext_inreg i1 legal. I'm delaying this until after LegalizeDAG since we normally have I also delayed the recently added (sext_inreg (xor (setcc), -1), i1) combine. Full diff: https://github.com/llvm/llvm-project/pull/154206.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ce03818b49502..a22ccda48dc82 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16639,33 +16639,38 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
}
static SDValue
-performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
+performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
SDValue Src = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
unsigned Opc = Src.getOpcode();
+ SDLoc DL(N);
// Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
// Don't do this with Zhinx. We need to explicitly sign extend the GPR.
if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
Subtarget.hasStdExtZfhmin())
- return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
- Src.getOperand(0));
+ return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
// Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
- return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
+ return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
Src.getOperand(1));
+ // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc)
+ if (Opc == ISD::SETCC && DCI.isAfterLegalizeDAG())
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
+
// Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
isAllOnesConstant(Src.getOperand(1)) &&
- Src.getOperand(0).getOpcode() == ISD::SETCC)
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, Src.getOperand(0),
- DAG.getAllOnesConstant(SDLoc(N), VT));
+ Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
+ return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
return SDValue();
}
@@ -20088,7 +20093,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SETCC:
return performSETCCCombine(N, DAG, Subtarget);
case ISD::SIGN_EXTEND_INREG:
- return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
+ return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
case ISD::ZERO_EXTEND:
// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
// type legalization. This is safe because fp_to_uint produces poison if
diff --git a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
index 6f1d168358e2e..f93d7e093c696 100644
--- a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
@@ -377,6 +377,32 @@ define zeroext i8 @sexti1_i32_setcc(i32 signext %a) {
ret i8 %sext
}
+; Make sure we don't use seqz+nds.bfos instead of snez+addi
+define i32 @sexti1_i32_setcc_2(i32 %a, i32 %b) {
+; CHECK-LABEL: sexti1_i32_setcc_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
+ %icmp = icmp eq i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
+; Make sure we don't use nds.bfos instead of neg.
+define i32 @sexti1_i32_setcc_3(i32 %a, i32 %b) {
+; CHECK-LABEL: sexti1_i32_setcc_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt a1, a0, a1
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: ret
+ %icmp = icmp slt i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
define i32 @sexti8_i32(i32 %a) {
; CHECK-LABEL: sexti8_i32:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
index 784f08ca616cc..0ccae67753f6b 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -334,6 +334,44 @@ define zeroext i8 @sexti1_i32_setcc(i32 signext %a) {
ret i8 %sext
}
+; Make sure we don't use seqz+th.ext instead of snez+addi
+define i32 @sexti1_i32_setcc_2(i32 %a, i32 %b) {
+; RV32I-LABEL: sexti1_i32_setcc_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: sexti1_i32_setcc_2:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: xor a0, a0, a1
+; RV32XTHEADBB-NEXT: snez a0, a0
+; RV32XTHEADBB-NEXT: addi a0, a0, -1
+; RV32XTHEADBB-NEXT: ret
+ %icmp = icmp eq i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
+; Make sure we don't use th.ext instead of neg.
+define i32 @sexti1_i32_setcc_3(i32 %a, i32 %b) {
+; RV32I-LABEL: sexti1_i32_setcc_3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: sexti1_i32_setcc_3:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: slt a0, a0, a1
+; RV32XTHEADBB-NEXT: neg a0, a0
+; RV32XTHEADBB-NEXT: ret
+ %icmp = icmp slt i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
define i32 @sextb_i32(i32 %a) nounwind {
; RV32I-LABEL: sextb_i32:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
index 406e5247ae0dd..225495817ae16 100644
--- a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
@@ -290,6 +290,31 @@ define zeroext i8 @sexti1_i32_setcc(i32 signext %a) {
ret i8 %sext
}
+; Make sure we don't use seqz+nds.bfos instead of snez+addi
+define signext i32 @sexti1_i32_setcc_2(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: sexti1_i32_setcc_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
+ %icmp = icmp eq i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
+; Make sure we don't use nds.bfos instead of neg.
+define signext i32 @sexti1_i32_setcc_3(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: sexti1_i32_setcc_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt a0, a0, a1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %icmp = icmp slt i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
define signext i32 @sexti8_i32(i32 signext %a) {
; CHECK-LABEL: sexti8_i32:
; CHECK: # %bb.0:
@@ -360,6 +385,31 @@ define zeroext i8 @sexti1_i64_setcc(i64 %a) {
ret i8 %sext
}
+; Make sure we don't use seqz+nds.bfos instead of snez+addi
+define i64 @sexti1_i64_setcc_2(i64 %a, i64 %b) {
+; CHECK-LABEL: sexti1_i64_setcc_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: ret
+ %icmp = icmp eq i64 %a, %b
+ %sext = sext i1 %icmp to i64
+ ret i64 %sext
+}
+
+; Make sure we don't use nds.bfos instead of neg.
+define i64 @sexti1_i64_setcc_3(i64 %a, i64 %b) {
+; CHECK-LABEL: sexti1_i64_setcc_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slt a0, a0, a1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %icmp = icmp slt i64 %a, %b
+ %sext = sext i1 %icmp to i64
+ ret i64 %sext
+}
+
define i64 @sexti8_i64(i64 %a) {
; CHECK-LABEL: sexti8_i64:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index c7902342f7f03..536217abd06c6 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -655,6 +655,44 @@ define zeroext i8 @sexti1_i32_setcc(i32 signext %a) {
ret i8 %sext
}
+; Make sure we don't use seqz+th.ext instead of snez+addi
+define signext i32 @sexti1_i32_setcc_2(i32 signext %a, i32 signext %b) {
+; RV64I-LABEL: sexti1_i32_setcc_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: sexti1_i32_setcc_2:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: snez a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -1
+; RV64XTHEADBB-NEXT: ret
+ %icmp = icmp eq i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
+; Make sure we don't use th.ext instead of neg.
+define signext i32 @sexti1_i32_setcc_3(i32 signext %a, i32 signext %b) {
+; RV64I-LABEL: sexti1_i32_setcc_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: neg a0, a0
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: sexti1_i32_setcc_3:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: slt a0, a0, a1
+; RV64XTHEADBB-NEXT: neg a0, a0
+; RV64XTHEADBB-NEXT: ret
+ %icmp = icmp slt i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
define i64 @sexti1_i64(i64 %a) nounwind {
; RV64I-LABEL: sexti1_i64:
; RV64I: # %bb.0:
@@ -706,6 +744,44 @@ define zeroext i8 @sexti1_i64_setcc(i64 %a) {
ret i8 %sext
}
+; Make sure we don't use seqz+th.ext instead of snez+addi
+define i64 @sexti1_i64_setcc_2(i64 %a, i64 %b) {
+; RV64I-LABEL: sexti1_i64_setcc_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: addi a0, a0, -1
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: sexti1_i64_setcc_2:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: xor a0, a0, a1
+; RV64XTHEADBB-NEXT: snez a0, a0
+; RV64XTHEADBB-NEXT: addi a0, a0, -1
+; RV64XTHEADBB-NEXT: ret
+ %icmp = icmp eq i64 %a, %b
+ %sext = sext i1 %icmp to i64
+ ret i64 %sext
+}
+
+; Make sure we don't use th.ext instead of neg.
+define i64 @sexti1_i64_setcc_3(i64 %a, i64 %b) {
+; RV64I-LABEL: sexti1_i64_setcc_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slt a0, a0, a1
+; RV64I-NEXT: neg a0, a0
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: sexti1_i64_setcc_3:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: slt a0, a0, a1
+; RV64XTHEADBB-NEXT: neg a0, a0
+; RV64XTHEADBB-NEXT: ret
+ %icmp = icmp slt i64 %a, %b
+ %sext = sext i1 %icmp to i64
+ ret i64 %sext
+}
+
define signext i32 @sextb_i32(i32 signext %a) nounwind {
; RV64I-LABEL: sextb_i32:
; RV64I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/xqcibm-extract.ll b/llvm/test/CodeGen/RISCV/xqcibm-extract.ll
index fc3d8fe54602a..c2c9d077b2526 100644
--- a/llvm/test/CodeGen/RISCV/xqcibm-extract.ll
+++ b/llvm/test/CodeGen/RISCV/xqcibm-extract.ll
@@ -74,6 +74,56 @@ define zeroext i8 @sexti1_i32_setcc(i32 signext %a) {
ret i8 %sext
}
+; Make sure we don't use seqz+qc.ext instead of snez+addi
+define i32 @sexti1_i32_setcc_2(i32 %a, i32 %b) {
+; RV32I-LABEL: sexti1_i32_setcc_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: ret
+;
+; RV32XQCIBM-LABEL: sexti1_i32_setcc_2:
+; RV32XQCIBM: # %bb.0:
+; RV32XQCIBM-NEXT: xor a0, a0, a1
+; RV32XQCIBM-NEXT: snez a0, a0
+; RV32XQCIBM-NEXT: addi a0, a0, -1
+; RV32XQCIBM-NEXT: ret
+;
+; RV32XQCIBMZBB-LABEL: sexti1_i32_setcc_2:
+; RV32XQCIBMZBB: # %bb.0:
+; RV32XQCIBMZBB-NEXT: xor a0, a0, a1
+; RV32XQCIBMZBB-NEXT: snez a0, a0
+; RV32XQCIBMZBB-NEXT: addi a0, a0, -1
+; RV32XQCIBMZBB-NEXT: ret
+ %icmp = icmp eq i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
+
+; Make sure we don't use qc.ext instead of neg.
+define i32 @sexti1_i32_setcc_3(i32 %a, i32 %b) {
+; RV32I-LABEL: sexti1_i32_setcc_3:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slt a0, a0, a1
+; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCIBM-LABEL: sexti1_i32_setcc_3:
+; RV32XQCIBM: # %bb.0:
+; RV32XQCIBM-NEXT: slt a0, a0, a1
+; RV32XQCIBM-NEXT: neg a0, a0
+; RV32XQCIBM-NEXT: ret
+;
+; RV32XQCIBMZBB-LABEL: sexti1_i32_setcc_3:
+; RV32XQCIBMZBB: # %bb.0:
+; RV32XQCIBMZBB-NEXT: slt a0, a0, a1
+; RV32XQCIBMZBB-NEXT: neg a0, a0
+; RV32XQCIBMZBB-NEXT: ret
+ %icmp = icmp slt i32 %a, %b
+ %sext = sext i1 %icmp to i32
+ ret i32 %sext
+}
define i32 @sexti8_i32(i8 %a) nounwind {
; RV32I-LABEL: sexti8_i32:
|
lenary
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| ; CHECK-NEXT: li a0, 0 | ||
| ; CHECK-NEXT: sub a0, a0, a1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks a bit strange, I'm not sure why it isn't using x0 here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is strange. I'll take a look.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test uses -O0 for some reason.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Huh. Ok, well, that's something.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Particularly weird when the rv64 one doesn't. Been that way since both were added in c78e6bb.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll look at removing it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like it was done to bias which branch instruction gets emitted for the bbc, bbs, beqc, select_beqc, bnec, and select_bnec tests
Co-authored-by: Sam Elliott <[email protected]>
lenary
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| ; CHECK-NEXT: li a0, 0 | ||
| ; CHECK-NEXT: sub a0, a0, a1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Huh. Ok, well, that's something.
This helps the 3 vendor extensions that make sext_inreg i1 legal.
I'm delaying this until after LegalizeDAG since we normally have
sext_inreg i1 up until LegalizeDAG turns it into and+neg.
I also delayed the recently added (sext_inreg (xor (setcc), -1), i1) combine.
Though the xor isn't likely to appear before LegalizeDAG anyway.