Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3949,6 +3949,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
return Result;

// Similar to the previous rule, but this time targeting an expanded abs.
// (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also do the converse:

(sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))

but I don't know if there's a real need for it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine to add it in this patch. It's done now.

// Note that this is applicable to both signed and unsigned min/max.
SDValue X;
if (LegalOperations &&
sd_match(N1,
m_OneUse(m_AnyOf(m_SMax(m_Value(X), m_Neg(m_Deferred(X))),
m_UMax(m_Value(X), m_Neg(m_Deferred(X))))))) {
unsigned MinOpc = N1->getOpcode() == ISD::SMAX ? ISD::SMIN : ISD::UMIN;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically you should check the max is legal, but I doubt in practice the legality of min is different than max

Copy link
Member Author

@mshockwave mshockwave Dec 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I doubt in practice the legality of min is different than max

This, and even if only max is illegal, I think it'll be expanded (while min is not), which makes this transformation even more profitable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It depends in which combiner phase, eventually only legal operations can be emitted

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, if you're referring to the max generated by this rule, I already checked its legality (through NewOpc) below.

if (hasOperation(MinOpc, VT))
return DAG.getNode(MinOpc, DL, VT, X,
DAG.getNode(ISD::SUB, DL, VT, N0, X));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can preserve flags

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. I simply capture and reuse the SDValue of the first sub. It's done now.

}

// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
SDValue N1S = DAG.getSplatValue(N1, true);
Expand Down
222 changes: 222 additions & 0 deletions llvm/test/CodeGen/RISCV/neg-abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,225 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
%neg = sub nsw i64 0, %abs
ret i64 %neg
}

define i32 @expanded_neg_abs32(i32 %x) {
; RV32I-LABEL: expanded_neg_abs32:
; RV32I: # %bb.0:
; RV32I-NEXT: neg a1, a0
; RV32I-NEXT: blt a0, a1, .LBB6_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: .LBB6_2:
; RV32I-NEXT: neg a0, a1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: expanded_neg_abs32:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: min a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: expanded_neg_abs32:
; RV64I: # %bb.0:
; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: negw a0, a0
; RV64I-NEXT: blt a1, a0, .LBB6_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: negw a0, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: expanded_neg_abs32:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sext.w a1, a0
; RV64ZBB-NEXT: negw a0, a0
; RV64ZBB-NEXT: max a0, a0, a1
; RV64ZBB-NEXT: negw a0, a0
; RV64ZBB-NEXT: ret
%n = sub i32 0, %x
%t = call i32 @llvm.smax.i32(i32 %n, i32 %x)
%r = sub i32 0, %t
ret i32 %r
}

define i32 @expanded_neg_abs32_unsigned(i32 %x) {
; RV32I-LABEL: expanded_neg_abs32_unsigned:
; RV32I: # %bb.0:
; RV32I-NEXT: neg a1, a0
; RV32I-NEXT: bltu a0, a1, .LBB7_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: neg a0, a1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: expanded_neg_abs32_unsigned:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: minu a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: expanded_neg_abs32_unsigned:
; RV64I: # %bb.0:
; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: negw a0, a0
; RV64I-NEXT: bltu a1, a0, .LBB7_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB7_2:
; RV64I-NEXT: negw a0, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: expanded_neg_abs32_unsigned:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sext.w a1, a0
; RV64ZBB-NEXT: negw a0, a0
; RV64ZBB-NEXT: maxu a0, a0, a1
; RV64ZBB-NEXT: negw a0, a0
; RV64ZBB-NEXT: ret
%n = sub i32 0, %x
%t = call i32 @llvm.umax.i32(i32 %n, i32 %x)
%r = sub i32 0, %t
ret i32 %r
}

define i64 @expanded_neg_abs64(i64 %x) {
; RV32I-LABEL: expanded_neg_abs64:
; RV32I: # %bb.0:
; RV32I-NEXT: snez a2, a0
; RV32I-NEXT: neg a3, a1
; RV32I-NEXT: sub a2, a3, a2
; RV32I-NEXT: neg a3, a0
; RV32I-NEXT: beq a2, a1, .LBB8_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt a4, a1, a2
; RV32I-NEXT: beqz a4, .LBB8_3
; RV32I-NEXT: j .LBB8_4
; RV32I-NEXT: .LBB8_2:
; RV32I-NEXT: sltu a4, a0, a3
; RV32I-NEXT: bnez a4, .LBB8_4
; RV32I-NEXT: .LBB8_3:
; RV32I-NEXT: mv a2, a1
; RV32I-NEXT: mv a3, a0
; RV32I-NEXT: .LBB8_4:
; RV32I-NEXT: snez a0, a3
; RV32I-NEXT: add a0, a2, a0
; RV32I-NEXT: neg a1, a0
; RV32I-NEXT: neg a0, a3
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: expanded_neg_abs64:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: snez a2, a0
; RV32ZBB-NEXT: neg a3, a1
; RV32ZBB-NEXT: sub a2, a3, a2
; RV32ZBB-NEXT: neg a3, a0
; RV32ZBB-NEXT: beq a2, a1, .LBB8_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: slt a4, a1, a2
; RV32ZBB-NEXT: beqz a4, .LBB8_3
; RV32ZBB-NEXT: j .LBB8_4
; RV32ZBB-NEXT: .LBB8_2:
; RV32ZBB-NEXT: sltu a4, a0, a3
; RV32ZBB-NEXT: bnez a4, .LBB8_4
; RV32ZBB-NEXT: .LBB8_3:
; RV32ZBB-NEXT: mv a2, a1
; RV32ZBB-NEXT: mv a3, a0
; RV32ZBB-NEXT: .LBB8_4:
; RV32ZBB-NEXT: snez a0, a3
; RV32ZBB-NEXT: add a0, a2, a0
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: neg a0, a3
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: expanded_neg_abs64:
; RV64I: # %bb.0:
; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: blt a0, a1, .LBB8_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: .LBB8_2:
; RV64I-NEXT: neg a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: expanded_neg_abs64:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: min a0, a0, a1
; RV64ZBB-NEXT: ret
%n = sub i64 0, %x
%t = call i64 @llvm.smax.i64(i64 %n, i64 %x)
%r = sub i64 0, %t
ret i64 %r
}

define i64 @expanded_neg_abs64_unsigned(i64 %x) {
; RV32I-LABEL: expanded_neg_abs64_unsigned:
; RV32I: # %bb.0:
; RV32I-NEXT: snez a2, a0
; RV32I-NEXT: neg a3, a1
; RV32I-NEXT: sub a2, a3, a2
; RV32I-NEXT: neg a3, a0
; RV32I-NEXT: beq a2, a1, .LBB9_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a1, a2
; RV32I-NEXT: beqz a4, .LBB9_3
; RV32I-NEXT: j .LBB9_4
; RV32I-NEXT: .LBB9_2:
; RV32I-NEXT: sltu a4, a0, a3
; RV32I-NEXT: bnez a4, .LBB9_4
; RV32I-NEXT: .LBB9_3:
; RV32I-NEXT: mv a2, a1
; RV32I-NEXT: mv a3, a0
; RV32I-NEXT: .LBB9_4:
; RV32I-NEXT: snez a0, a3
; RV32I-NEXT: add a0, a2, a0
; RV32I-NEXT: neg a1, a0
; RV32I-NEXT: neg a0, a3
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: expanded_neg_abs64_unsigned:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: snez a2, a0
; RV32ZBB-NEXT: neg a3, a1
; RV32ZBB-NEXT: sub a2, a3, a2
; RV32ZBB-NEXT: neg a3, a0
; RV32ZBB-NEXT: beq a2, a1, .LBB9_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: sltu a4, a1, a2
; RV32ZBB-NEXT: beqz a4, .LBB9_3
; RV32ZBB-NEXT: j .LBB9_4
; RV32ZBB-NEXT: .LBB9_2:
; RV32ZBB-NEXT: sltu a4, a0, a3
; RV32ZBB-NEXT: bnez a4, .LBB9_4
; RV32ZBB-NEXT: .LBB9_3:
; RV32ZBB-NEXT: mv a2, a1
; RV32ZBB-NEXT: mv a3, a0
; RV32ZBB-NEXT: .LBB9_4:
; RV32ZBB-NEXT: snez a0, a3
; RV32ZBB-NEXT: add a0, a2, a0
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: neg a0, a3
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: expanded_neg_abs64_unsigned:
; RV64I: # %bb.0:
; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: bltu a0, a1, .LBB9_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: .LBB9_2:
; RV64I-NEXT: neg a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: expanded_neg_abs64_unsigned:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: minu a0, a0, a1
; RV64ZBB-NEXT: ret
%n = sub i64 0, %x
%t = call i64 @llvm.umax.i64(i64 %n, i64 %x)
%r = sub i64 0, %t
ret i64 %r
}
Loading