-
Notifications
You must be signed in to change notification settings - Fork 15.3k
DAGCombiner: Support fmaximum/fminimum and fmaximumnum/fminimumnum #137318
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-aarch64 Author: YunQiang Su (wzssyqa) ChangesSome architecutres like RISC-V supports new fmax/fmin instructions introduced by IEEE754-2019. We can use them in Patch is 41.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137318.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 282dc4470238d..79f626bf88c1e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6333,60 +6333,70 @@ static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
}
-// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
-static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
- ISD::CondCode CC, unsigned OrAndOpcode,
- SelectionDAG &DAG,
- bool isFMAXNUMFMINNUM_IEEE,
- bool isFMAXNUMFMINNUM) {
- // The optimization cannot be applied for all the predicates because
- // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
- // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
- // applied at all if one of the operands is a signaling NaN.
-
- // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
- // are non NaN values.
- if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
- return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
- isFMAXNUMFMINNUM_IEEE
- ? ISD::FMINNUM_IEEE
- : ISD::DELETED_NODE;
- else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETLT || CC == ISD::SETLE) &&
- (OrAndOpcode == ISD::AND)))
- return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
- isFMAXNUMFMINNUM_IEEE
- ? ISD::FMAXNUM_IEEE
- : ISD::DELETED_NODE;
- // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
- // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
- // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
- // that there are not any sNaNs, then the optimization is not valid
- // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
- // the optimization using FMINNUM/FMAXNUM for the following cases. If
- // we can prove that we do not have any sNaNs, then we can do the
- // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
- // cases.
- else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
- (OrAndOpcode == ISD::AND)))
- return isFMAXNUMFMINNUM ? ISD::FMINNUM
- : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
- isFMAXNUMFMINNUM_IEEE
- ? ISD::FMINNUM_IEEE
- : ISD::DELETED_NODE;
- else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
- (OrAndOpcode == ISD::OR)) ||
- ((CC == ISD::SETULT || CC == ISD::SETULE) &&
- (OrAndOpcode == ISD::AND)))
- return isFMAXNUMFMINNUM ? ISD::FMAXNUM
- : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
- isFMAXNUMFMINNUM_IEEE
- ? ISD::FMAXNUM_IEEE
- : ISD::DELETED_NODE;
+static unsigned
+getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC,
+ unsigned OrAndOpcode, SelectionDAG &DAG,
+ bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM,
+ bool isFMAXIMUMFMINIMUM, bool isFMAXIMUMNUMFMINIMUMNUM) {
+ bool isMax = true;
+ // SETLT/SETLE/SETGT/SETGE are undefined if any Operand is NaN. We
+ // treat them as SETOLT/SETOLE/SETOGT/SETOGE.
+ if (((CC == ISD::SETLT || CC == ISD::SETLE || CC == ISD::SETOLT ||
+ CC == ISD::SETOLE) &&
+ (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETUGT || CC == ISD::SETUGE) && (OrAndOpcode == ISD::AND))) {
+ isMax = false;
+ if (arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE)
+ return ISD::FMINNUM_IEEE;
+ if (arebothOperandsNotSNan(Operand1, Operand2, DAG) && isFMAXNUMFMINNUM)
+ return ISD::FMINNUM;
+ if (isFMAXIMUMNUMFMINIMUMNUM)
+ return ISD::FMINIMUMNUM;
+ } else if (((CC == ISD::SETLT || CC == ISD::SETLE || CC == ISD::SETOLT ||
+ CC == ISD::SETOLE) &&
+ (OrAndOpcode == ISD::AND)) ||
+ ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
+ (OrAndOpcode == ISD::OR))) {
+ isMax = true;
+ if (isFMAXIMUMFMINIMUM)
+ return ISD::FMAXIMUM;
+ } else if (((CC == ISD::SETGT || CC == ISD::SETGE || CC == ISD::SETOGT ||
+ CC == ISD::SETOGE) &&
+ (OrAndOpcode == ISD::OR)) ||
+ ((CC == ISD::SETULT || CC == ISD::SETULE) &&
+ (OrAndOpcode == ISD::AND))) {
+ isMax = true;
+ if (arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
+ isFMAXNUMFMINNUM_IEEE)
+ return ISD::FMAXNUM_IEEE;
+ if (arebothOperandsNotSNan(Operand1, Operand2, DAG) && isFMAXNUMFMINNUM)
+ return ISD::FMAXNUM;
+ if (isFMAXIMUMNUMFMINIMUMNUM)
+ return ISD::FMAXIMUMNUM;
+ } else if (((CC == ISD::SETGT || CC == ISD::SETGE || CC == ISD::SETOGT ||
+ CC == ISD::SETOGE) &&
+ (OrAndOpcode == ISD::AND)) ||
+ ((CC == ISD::SETULT || CC == ISD::SETULE) &&
+ (OrAndOpcode == ISD::OR))) {
+ isMax = false;
+ if (isFMAXIMUMFMINIMUM)
+ return ISD::FMINIMUM;
+ }
+ if (arebothOperandsNotNan(Operand1, Operand2, DAG)) {
+ // Keep this order to help unittest easy:
+ // AArch64 has FMAXNUM_IEEE, while not FMAXIMUMNUM
+ // RISCV64 has FMAXIMUMNUM, while not FMAXNUM_IEEE
+ // Both has FMAXIMUM (RISCV64 has a switch for it)
+ if (isFMAXIMUMFMINIMUM)
+ return isMax ? ISD::FMAXIMUM : ISD::FMINIMUM;
+ if (isFMAXNUMFMINNUM_IEEE)
+ return isMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
+ if (isFMAXIMUMNUMFMINIMUMNUM)
+ return isMax ? ISD::FMAXIMUMNUM : ISD::FMINIMUMNUM;
+ if (isFMAXNUMFMINNUM)
+ return isMax ? ISD::FMAXNUM : ISD::FMINNUM;
+ }
return ISD::DELETED_NODE;
}
@@ -6433,14 +6443,20 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
// predicate of one of the comparisons is the opposite of the other one.
bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
- bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
- TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
+ bool isFMAXNUMFMINNUM = TLI.isOperationLegal(ISD::FMAXNUM, OpVT) &&
+ TLI.isOperationLegal(ISD::FMINNUM, OpVT);
+ bool isFMAXIMUMFMINIMUM = TLI.isOperationLegal(ISD::FMAXIMUM, OpVT) &&
+ TLI.isOperationLegal(ISD::FMINIMUM, OpVT);
+ bool isFMAXIMUMNUMFMINIMUMNUM =
+ TLI.isOperationLegal(ISD::FMAXIMUMNUM, OpVT) &&
+ TLI.isOperationLegal(ISD::FMINIMUMNUM, OpVT);
if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
TLI.isOperationLegal(ISD::SMAX, OpVT) &&
TLI.isOperationLegal(ISD::UMIN, OpVT) &&
TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
(OpVT.isFloatingPoint() &&
- (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
+ (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM || isFMAXIMUMFMINIMUM ||
+ isFMAXIMUMNUMFMINIMUMNUM))) &&
!ISD::isIntEqualitySetCC(CCL) && !ISD::isFPEqualitySetCC(CCL) &&
CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
CCL != ISD::SETTRUE &&
@@ -6496,7 +6512,8 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
} else if (OpVT.isFloatingPoint())
NewOpcode =
getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
- DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
+ DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM,
+ isFMAXIMUMFMINIMUM, isFMAXIMUMNUMFMINIMUMNUM);
if (NewOpcode != ISD::DELETED_NODE) {
SDValue MinMaxValue =
diff --git a/llvm/test/CodeGen/AArch64/fmaxmin-combine.ll b/llvm/test/CodeGen/AArch64/fmaxmin-combine.ll
new file mode 100644
index 0000000000000..671f47a46dc8a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fmaxmin-combine.ll
@@ -0,0 +1,340 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=aarch64 < %s | FileCheck %s --check-prefix=AARCH64
+
+define i1 @f_olt_or(float %a, float %b, float %c) {
+; AARCH64-LABEL: f_olt_or:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w8, mi
+; AARCH64-NEXT: fcmp s1, s2
+; AARCH64-NEXT: csinc w0, w8, wzr, pl
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp olt float %a, %c
+ %cmp1 = fcmp olt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_olt_or_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; AARCH64-LABEL: f_olt_or_nan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, mi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp olt float %a, %c
+ %cmp1 = fcmp olt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_olt_or_snan(float nofpclass(snan) %a, float nofpclass(snan) %b, float nofpclass(snan) %c) {
+; AARCH64-LABEL: f_olt_or_snan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, mi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp olt float %a, %c
+ %cmp1 = fcmp olt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ugt_and(float %a, float %b, float %c) {
+; AARCH64-LABEL: f_ugt_and:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fcmp s1, s2
+; AARCH64-NEXT: fccmp s0, s2, #0, hi
+; AARCH64-NEXT: cset w0, hi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ugt float %a, %c
+ %cmp1 = fcmp ugt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ugt_and_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; AARCH64-LABEL: f_ugt_and_nan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, hi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ugt float %a, %c
+ %cmp1 = fcmp ugt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ugt_and_snan(float nofpclass(snan) %a, float nofpclass(snan) %b, float nofpclass(snan) %c) {
+; AARCH64-LABEL: f_ugt_and_snan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fminnm s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, hi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ugt float %a, %c
+ %cmp1 = fcmp ugt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_olt_and(float %a, float %b, float %c) {
+; AARCH64-LABEL: f_olt_and:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmax s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, mi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp olt float %a, %c
+ %cmp1 = fcmp olt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_olt_and_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; AARCH64-LABEL: f_olt_and_nan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmax s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, mi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp olt float %a, %c
+ %cmp1 = fcmp olt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_olt_and_snan(float nofpclass(snan) %a, float nofpclass(snan) %b, float nofpclass(snan) %c) {
+; AARCH64-LABEL: f_olt_and_snan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmax s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, mi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp olt float %a, %c
+ %cmp1 = fcmp olt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ugt_or(float %a, float %b, float %c) {
+; AARCH64-LABEL: f_ugt_or:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmax s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, hi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ugt float %a, %c
+ %cmp1 = fcmp ugt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ugt_or_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; AARCH64-LABEL: f_ugt_or_nan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmax s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, hi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ugt float %a, %c
+ %cmp1 = fcmp ugt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ugt_or_snan(float nofpclass(snan) %a, float nofpclass(snan) %b, float nofpclass(snan) %c) {
+; AARCH64-LABEL: f_ugt_or_snan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmax s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, hi
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ugt float %a, %c
+ %cmp1 = fcmp ugt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ogt_or(float %a, float %b, float %c) {
+; AARCH64-LABEL: f_ogt_or:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w8, gt
+; AARCH64-NEXT: fcmp s1, s2
+; AARCH64-NEXT: csinc w0, w8, wzr, le
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ogt float %a, %c
+ %cmp1 = fcmp ogt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ogt_or_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; AARCH64-LABEL: f_ogt_or_nan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, gt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ogt float %a, %c
+ %cmp1 = fcmp ogt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ogt_or_snan(float nofpclass(snan) %a, float nofpclass(snan) %b, float nofpclass(snan) %c) {
+; AARCH64-LABEL: f_ogt_or_snan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, gt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ogt float %a, %c
+ %cmp1 = fcmp ogt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ult_and(float %a, float %b, float %c) {
+; AARCH64-LABEL: f_ult_and:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fcmp s1, s2
+; AARCH64-NEXT: fccmp s0, s2, #0, lt
+; AARCH64-NEXT: cset w0, lt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ult float %a, %c
+ %cmp1 = fcmp ult float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ult_and_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; AARCH64-LABEL: f_ult_and_nan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, lt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ult float %a, %c
+ %cmp1 = fcmp ult float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ult_and_snan(float nofpclass(snan) %a, float nofpclass(snan) %b, float nofpclass(snan) %c) {
+; AARCH64-LABEL: f_ult_and_snan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmaxnm s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, lt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ult float %a, %c
+ %cmp1 = fcmp ult float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ogt_and(float %a, float %b, float %c) {
+; AARCH64-LABEL: f_ogt_and:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmin s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, gt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ogt float %a, %c
+ %cmp1 = fcmp ogt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ogt_and_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; AARCH64-LABEL: f_ogt_and_nan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmin s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, gt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ogt float %a, %c
+ %cmp1 = fcmp ogt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ogt_and_snan(float nofpclass(snan) %a, float nofpclass(snan) %b, float nofpclass(snan) %c) {
+; AARCH64-LABEL: f_ogt_and_snan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmin s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, gt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ogt float %a, %c
+ %cmp1 = fcmp ogt float %b, %c
+ %0 = and i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ult_or(float %a, float %b, float %c) {
+; AARCH64-LABEL: f_ult_or:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmin s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, lt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ult float %a, %c
+ %cmp1 = fcmp ult float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ult_or_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; AARCH64-LABEL: f_ult_or_nan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmin s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, lt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ult float %a, %c
+ %cmp1 = fcmp ult float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_ult_or_snan(float nofpclass(snan) %a, float nofpclass(snan) %b, float nofpclass(snan) %c) {
+; AARCH64-LABEL: f_ult_or_snan:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fmin s0, s0, s1
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w0, lt
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp ult float %a, %c
+ %cmp1 = fcmp ult float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
diff --git a/llvm/test/CodeGen/RISCV/fmaxmin-combine.ll b/llvm/test/CodeGen/RISCV/fmaxmin-combine.ll
new file mode 100644
index 0000000000000..1da45ac0f2734
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fmaxmin-combine.ll
@@ -0,0 +1,742 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=riscv64 --mattr=+f,+zfa < %s | FileCheck %s --check-prefix=RISCV64
+; RUN: llc --mtriple=riscv64 --mattr=+f,-zfa < %s | FileCheck %s --check-prefix=RISCV64-NOZFA
+; RUN: llc --mtriple=aarch64 < %s | FileCheck %s --check-prefix=AARCH64
+
+define i1 @f_olt_or(float %a, float %b, float %c) {
+; RISCV64-LABEL: f_olt_or:
+; RISCV64: # %bb.0: # %entry
+; RISCV64-NEXT: fmin.s fa5, fa0, fa1
+; RISCV64-NEXT: flt.s a0, fa5, fa2
+; RISCV64-NEXT: ret
+;
+; RISCV64-NOZFA-LABEL: f_olt_or:
+; RISCV64-NOZFA: # %bb.0: # %entry
+; RISCV64-NOZFA-NEXT: fmin.s fa5, fa0, fa1
+; RISCV64-NOZFA-NEXT: flt.s a0, fa5, fa2
+; RISCV64-NOZFA-NEXT: ret
+;
+; AARCH64-LABEL: f_olt_or:
+; AARCH64: // %bb.0: // %entry
+; AARCH64-NEXT: fcmp s0, s2
+; AARCH64-NEXT: cset w8, mi
+; AARCH64-NEXT: fcmp s1, s2
+; AARCH64-NEXT: csinc w0, w8, wzr, pl
+; AARCH64-NEXT: ret
+entry:
+ %cmp = fcmp olt float %a, %c
+ %cmp1 = fcmp olt float %b, %c
+ %0 = or i1 %cmp, %cmp1
+ ret i1 %0
+}
+
+define i1 @f_olt_or_nan(float nofpclass(nan) %a, float nofpclass(nan) %b, float nofpclass(nan) %c) {
+; RISCV64-LABEL: f_olt_or_nan:
+; RISCV64: # %bb.0: # %entry
+; RISCV64-NEXT: fmin.s fa5, fa0, fa1
+; RISCV64-NEXT: flt.s a0, fa5, fa2
+; RISCV64-NEXT: ret
+;
+; RISCV64-NOZFA-LABEL: f_olt_or_nan:
+; RISCV64-NOZFA: # %bb.0: # %entry
+; RISCV64-NOZFA-NEXT: fmin.s fa5, fa0, fa1
+; RISCV64-NOZFA-NEXT: flt.s a0, fa5, fa2
+; RISCV64-NOZFA-NEXT: ret
+;
+; AARCH64-LABEL: f_olt_or_nan:
+; AARCH64: // %bb.0: // %entry
+; AAR...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this would be easier if we completed the migration away from the IEEE opcodes first
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need some more backend work to migration away from the IEEE opcodes. I'd prefer to do them parallel.
Once we support FMAXNUM/FMINNUM completely, it will quite easy to remove the IEEE opcodes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My plan for this migration is:
- Support both the IEEE and non-IEEE opcodes in possible backends.
- Support both the IEEE and non-IEEE opcodes in SelectionDAG methods.
- Remove all the reference of IEEE opcodes in all backends.
- Remove the definition and all reference of IEEE opcodes in common part.
So that we can be sure that any commit of our git repo is in good shape.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we are not very care about it, we may introduce some new compiler internal error.
Some architecutres like RISC-V supports new fmax/fmin instructions introduced by IEEE754-2019. We can use them in `getMinMaxOpcodeForFP`.
Some architecutres like RISC-V supports new fmax/fmin instructions introduced by IEEE754-2019.
We can use them in
getMinMaxOpcodeForFP.