Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16726,10 +16726,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
DAG.getConstant(0, DL, XLenVT), CC);
}

// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
// can become a sext.w instead of a shift pair.
static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
Expand All @@ -16749,20 +16745,36 @@ static SDValue performSETCCCombine(SDNode *N,
combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
return V;

// (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC =
cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
// (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
unsigned ShiftBits = AndRHSC.countr_zero();
SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
DAG.getConstant(ShiftBits, dl, VT));
SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
DAG.getConstant(ShiftBits, dl, OpVT));
return DAG.getSetCC(dl, VT, Shift, N1, Cond);
}

// Similar to above but handling the lower 32 bits by using srliw.
// FIXME: Handle the case where N1 is non-zero.
if (OpVT == MVT::i64 && AndRHSC.getZExtValue() <= 0xffffffff &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make sure I follow, this is handling the case where a 0..01111..0 mask (with the 0..0 being exactly the high 32 bits), by masking out the bottom 32 and then doing the same shift compare as above? And this happens to fold into a srlw because we only care about the zero-ness of the shift, so extending bit 31 is fine?

If so, can we generalize this by replacing the AND with a SHL to clear the high bits, and adjusting the SRL amount?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A: We already were generating the shift pair sequence, and this an optimization on that.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this happens to fold into a srlw because we only care about the zero-ness of the shift, so extending bit 31 is fine?

srliw is filling zeros to any bits in the result above the original bit 31. I'm not sure if that's what you mean by "extending bit 31"?. It would also be correct to use sraiw for this case which would duplicate the original bit 31.

If so, can we generalize this by replacing the AND with a SHL to clear the high bits, and adjusting the SRL amount?

Yes

Copy link
Collaborator Author

@topperc topperc Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A: We already were generating the shift pair sequence, and this an optimization on that.

We're generating a shift pair sequence for the 32 leading zero case, but I don't think we do for other leading zero amounts. Which I think is what you were suggesting?

A general 0..0111..0 AND mask requires 3 shifts to implement and we don't do that currently. If we know the user is a seteq/setne we can use 2 shifts because we can move the bits to the lsbs of the compare.

isPowerOf2_32(-uint32_t(AndRHSC.getZExtValue()))) {
unsigned ShiftBits = llvm::countr_zero(AndRHSC.getZExtValue());
SDValue And = DAG.getNode(ISD::AND, dl, OpVT, N0.getOperand(0),
DAG.getConstant(0xffffffff, dl, OpVT));
SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, And,
DAG.getConstant(ShiftBits, dl, OpVT));
return DAG.getSetCC(dl, VT, Shift, N1, Cond);
}
}

// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
// bit 31. Same for setne. C1' may be cheaper to materialize and the
// sext_inreg can become a sext.w instead of a shift pair.
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
return SDValue();

Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1698,8 +1698,6 @@ let Predicates = [IsRV32] in {
def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible
}
let Predicates = [IsRV64] in {
def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)),
(XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>;
def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible
def : Pat<(i64 (setlt (sext_inreg GPR:$rs1, i32), 0)), (SRLIW GPR:$rs1, 31)>;
}
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,37 @@ define i1 @test4(i64 %x) {
%b = icmp eq i64 %a, 0
ret i1 %b
}

define i1 @test5(i64 %x) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you precommit the tests so we can see prior codegen?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you precommit the tests so we can see prior codegen?

They are in separate commits in the PR.

; RV32-LABEL: test5:
; RV32: # %bb.0:
; RV32-NEXT: srli a0, a0, 29
; RV32-NEXT: seqz a0, a0
; RV32-NEXT: ret
;
; RV64-LABEL: test5:
; RV64: # %bb.0:
; RV64-NEXT: srliw a0, a0, 29
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: ret
%a = and i64 %x, u0xE0000000
%b = icmp eq i64 %a, 0
ret i1 %b
}

define i1 @test6(i64 %x) {
; RV32-LABEL: test6:
; RV32: # %bb.0:
; RV32-NEXT: srli a0, a0, 29
; RV32-NEXT: snez a0, a0
; RV32-NEXT: ret
;
; RV64-LABEL: test6:
; RV64: # %bb.0:
; RV64-NEXT: srliw a0, a0, 29
; RV64-NEXT: snez a0, a0
; RV64-NEXT: ret
%a = and i64 %x, u0xE0000000
%b = icmp ne i64 %a, 0
ret i1 %b
}
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/RISCV/bittest.ll
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,13 @@ define i64 @bittest_31_i64(i64 %a) nounwind {
; RV64ZBS-LABEL: bittest_31_i64:
; RV64ZBS: # %bb.0:
; RV64ZBS-NEXT: srliw a0, a0, 31
; RV64ZBS-NEXT: xori a0, a0, 1
; RV64ZBS-NEXT: seqz a0, a0
; RV64ZBS-NEXT: ret
;
; RV64XTHEADBS-LABEL: bittest_31_i64:
; RV64XTHEADBS: # %bb.0:
; RV64XTHEADBS-NEXT: srliw a0, a0, 31
; RV64XTHEADBS-NEXT: xori a0, a0, 1
; RV64XTHEADBS-NEXT: seqz a0, a0
; RV64XTHEADBS-NEXT: ret
%shr = lshr i64 %a, 31
%not = xor i64 %shr, -1
Expand Down Expand Up @@ -3518,7 +3518,7 @@ define i32 @bittest_31_andeq0_i64(i64 %x) {
; RV64-LABEL: bittest_31_andeq0_i64:
; RV64: # %bb.0:
; RV64-NEXT: srliw a0, a0, 31
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: ret
%and = and i64 %x, 2147483648
%cmp = icmp eq i64 %and, 0
Expand Down
Loading