-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV][GISel] Use libcalls for f32/f64 G_FCMP without F/D extensions. #117660
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
LegalizerHelp only supported f128 libcalls and incorrectly assumed that the destination register for the G_FCMP was s32.
|
@llvm/pr-subscribers-llvm-globalisel Author: Craig Topper (topperc) ChangesLegalizerHelp only supported f128 libcalls and incorrectly assumed that the destination register for the G_FCMP was s32. Patch is 42.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117660.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 321760ef822bc2..29c7d805ea81e2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1001,23 +1001,36 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
/// the ICMP predicate that should be generated to compare with #0
/// after the libcall.
static std::pair<RTLIB::Libcall, CmpInst::Predicate>
-getFCMPLibcallDesc(const CmpInst::Predicate Pred) {
+getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size) {
+#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return {RTLIB::LibcallPrefix##32, ICmpPred}; \
+ case 64: \
+ return {RTLIB::LibcallPrefix##64, ICmpPred}; \
+ case 128: \
+ return {RTLIB::LibcallPrefix##128, ICmpPred}; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
switch (Pred) {
case CmpInst::FCMP_OEQ:
- return {RTLIB::OEQ_F128, CmpInst::ICMP_EQ};
+ RTLIBCASE_CMP(OEQ_F, CmpInst::ICMP_EQ);
case CmpInst::FCMP_UNE:
- return {RTLIB::UNE_F128, CmpInst::ICMP_NE};
+ RTLIBCASE_CMP(UNE_F, CmpInst::ICMP_NE);
case CmpInst::FCMP_OGE:
- return {RTLIB::OGE_F128, CmpInst::ICMP_SGE};
+ RTLIBCASE_CMP(OGE_F, CmpInst::ICMP_SGE);
case CmpInst::FCMP_OLT:
- return {RTLIB::OLT_F128, CmpInst::ICMP_SLT};
+ RTLIBCASE_CMP(OLT_F, CmpInst::ICMP_SLT);
case CmpInst::FCMP_OLE:
- return {RTLIB::OLE_F128, CmpInst::ICMP_SLE};
+ RTLIBCASE_CMP(OLE_F, CmpInst::ICMP_SLE);
case CmpInst::FCMP_OGT:
- return {RTLIB::OGT_F128, CmpInst::ICMP_SGT};
+ RTLIBCASE_CMP(OGT_F, CmpInst::ICMP_SGT);
case CmpInst::FCMP_UNO:
- return {RTLIB::UO_F128, CmpInst::ICMP_NE};
+ RTLIBCASE_CMP(UO_F, CmpInst::ICMP_NE);
default:
return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
}
@@ -1032,21 +1045,24 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
const GFCmp *Cmp = cast<GFCmp>(&MI);
LLT OpLLT = MRI.getType(Cmp->getLHSReg());
- if (OpLLT != LLT::scalar(128) || OpLLT != MRI.getType(Cmp->getRHSReg()))
+ unsigned Size = OpLLT.getSizeInBits();
+ if ((Size != 32 && Size != 64 && Size != 128) ||
+ OpLLT != MRI.getType(Cmp->getRHSReg()))
return UnableToLegalize;
Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
// DstReg type is s32
const Register DstReg = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(DstReg);
const auto Cond = Cmp->getCond();
// Reference:
// https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
// Generates a libcall followed by ICMP.
- const auto BuildLibcall =
- [&](const RTLIB::Libcall Libcall, const CmpInst::Predicate ICmpPred,
- const DstOp &Res = LLT::scalar(32)) -> Register {
+ const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
+ const CmpInst::Predicate ICmpPred,
+ const DstOp &Res) -> Register {
// FCMP libcall always returns an i32, and needs an ICMP with #0.
constexpr LLT TempLLT = LLT::scalar(32);
Register Temp = MRI.createGenericVirtualRegister(TempLLT);
@@ -1065,7 +1081,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
};
// Simple case if we have a direct mapping from predicate to libcall
- if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond);
+ if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
Libcall != RTLIB::UNKNOWN_LIBCALL &&
ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
@@ -1081,11 +1097,13 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// FCMP_UEQ: unordered or equal
// Convert into (FCMP_OEQ || FCMP_UNO).
- const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
- const auto Oeq = BuildLibcall(OeqLibcall, OeqPred);
+ const auto [OeqLibcall, OeqPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
+ const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
- const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
- const auto Uno = BuildLibcall(UnoLibcall, UnoPred);
+ const auto [UnoLibcall, UnoPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
+ const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
if (Oeq && Uno)
MIRBuilder.buildOr(DstReg, Oeq, Uno);
else
@@ -1100,13 +1118,15 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// We inverse the predicate instead of generating a NOT
// to save one instruction.
// On AArch64 isel can even select two cmp into a single ccmp.
- const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
+ const auto [OeqLibcall, OeqPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
const auto NotOeq =
- BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred));
+ BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
- const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
+ const auto [UnoLibcall, UnoPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
const auto NotUno =
- BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred));
+ BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
if (NotOeq && NotUno)
MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
@@ -1128,7 +1148,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
// Op1, Op2));
const auto [InversedLibcall, InversedPred] =
- getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond));
+ getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond), Size);
if (!BuildLibcall(InversedLibcall,
CmpInst::getInversePredicate(InversedPred), DstReg))
return UnableToLegalize;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index a9294e76f8763f..e7166fdab8c268 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -530,7 +530,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.legalFor(ST.hasStdExtF(), {{sXLen, s32}})
.legalFor(ST.hasStdExtD(), {{sXLen, s64}})
.legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
- .clampScalar(ST.hasStdExtF(), 0, sXLen, sXLen);
+ .clampScalar(0, sXLen, sXLen)
+ .libcallFor({{sXLen, s32}, {sXLen, s64}});
// TODO: Support vector version of G_IS_FPCLASS.
getActionDefinitionsBuilder(G_IS_FPCLASS)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
new file mode 100644
index 00000000000000..ce3bd794477302
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
@@ -0,0 +1,584 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: -target-abi=ilp32d | FileCheck -check-prefix=CHECKIFD %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: -target-abi=lp64d | FileCheck -check-prefix=CHECKIFD %s
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+
+define i32 @fcmp_false(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_false:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: li a0, 0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_false:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a0, 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_false:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a0, 0
+; RV64I-NEXT: ret
+ %1 = fcmp false double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_oeq(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_oeq:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: feq.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_oeq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_oeq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp oeq double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ogt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ogt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa1, fa0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ogt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __gtdf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ogt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __gtdf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ogt double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_oge(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_oge:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa1, fa0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_oge:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __gedf2
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: xori a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_oge:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __gedf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: xori a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp oge double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_olt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_olt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_olt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ltdf2
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_olt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ltdf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp olt double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ole(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ole:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ole:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ledf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: xori a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ole:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ledf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: xori a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ole double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_one(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_one:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: flt.d a1, fa1, fa0
+; CHECKIFD-NEXT: or a0, a0, a1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_one:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s3, a3
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: snez s4, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a0, s4, a0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_one:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: snez s2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: and a0, s2, a0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %1 = fcmp one double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ord(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ord:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: feq.d a0, fa0, fa0
+; CHECKIFD-NEXT: feq.d a1, fa1, fa1
+; CHECKIFD-NEXT: and a0, a0, a1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ord:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ord:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ord double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ueq(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ueq:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: flt.d a1, fa1, fa0
+; CHECKIFD-NEXT: or a0, a0, a1
+; CHECKIFD-NEXT: xori a0, a0, 1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ueq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s3, a3
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: or a0, s4, a0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ueq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: or a0, s2, a0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %1 = fcmp ueq double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ugt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ugt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa0, fa1
+; CHECKIFD-NEXT: xori a0, a0, 1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ugt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ledf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ugt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ledf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-N...
[truncated]
|
|
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesLegalizerHelp only supported f128 libcalls and incorrectly assumed that the destination register for the G_FCMP was s32. Patch is 42.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117660.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 321760ef822bc2..29c7d805ea81e2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1001,23 +1001,36 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
/// the ICMP predicate that should be generated to compare with #0
/// after the libcall.
static std::pair<RTLIB::Libcall, CmpInst::Predicate>
-getFCMPLibcallDesc(const CmpInst::Predicate Pred) {
+getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size) {
+#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return {RTLIB::LibcallPrefix##32, ICmpPred}; \
+ case 64: \
+ return {RTLIB::LibcallPrefix##64, ICmpPred}; \
+ case 128: \
+ return {RTLIB::LibcallPrefix##128, ICmpPred}; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
switch (Pred) {
case CmpInst::FCMP_OEQ:
- return {RTLIB::OEQ_F128, CmpInst::ICMP_EQ};
+ RTLIBCASE_CMP(OEQ_F, CmpInst::ICMP_EQ);
case CmpInst::FCMP_UNE:
- return {RTLIB::UNE_F128, CmpInst::ICMP_NE};
+ RTLIBCASE_CMP(UNE_F, CmpInst::ICMP_NE);
case CmpInst::FCMP_OGE:
- return {RTLIB::OGE_F128, CmpInst::ICMP_SGE};
+ RTLIBCASE_CMP(OGE_F, CmpInst::ICMP_SGE);
case CmpInst::FCMP_OLT:
- return {RTLIB::OLT_F128, CmpInst::ICMP_SLT};
+ RTLIBCASE_CMP(OLT_F, CmpInst::ICMP_SLT);
case CmpInst::FCMP_OLE:
- return {RTLIB::OLE_F128, CmpInst::ICMP_SLE};
+ RTLIBCASE_CMP(OLE_F, CmpInst::ICMP_SLE);
case CmpInst::FCMP_OGT:
- return {RTLIB::OGT_F128, CmpInst::ICMP_SGT};
+ RTLIBCASE_CMP(OGT_F, CmpInst::ICMP_SGT);
case CmpInst::FCMP_UNO:
- return {RTLIB::UO_F128, CmpInst::ICMP_NE};
+ RTLIBCASE_CMP(UO_F, CmpInst::ICMP_NE);
default:
return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
}
@@ -1032,21 +1045,24 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
const GFCmp *Cmp = cast<GFCmp>(&MI);
LLT OpLLT = MRI.getType(Cmp->getLHSReg());
- if (OpLLT != LLT::scalar(128) || OpLLT != MRI.getType(Cmp->getRHSReg()))
+ unsigned Size = OpLLT.getSizeInBits();
+ if ((Size != 32 && Size != 64 && Size != 128) ||
+ OpLLT != MRI.getType(Cmp->getRHSReg()))
return UnableToLegalize;
Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
// DstReg type is s32
const Register DstReg = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(DstReg);
const auto Cond = Cmp->getCond();
// Reference:
// https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
// Generates a libcall followed by ICMP.
- const auto BuildLibcall =
- [&](const RTLIB::Libcall Libcall, const CmpInst::Predicate ICmpPred,
- const DstOp &Res = LLT::scalar(32)) -> Register {
+ const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
+ const CmpInst::Predicate ICmpPred,
+ const DstOp &Res) -> Register {
// FCMP libcall always returns an i32, and needs an ICMP with #0.
constexpr LLT TempLLT = LLT::scalar(32);
Register Temp = MRI.createGenericVirtualRegister(TempLLT);
@@ -1065,7 +1081,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
};
// Simple case if we have a direct mapping from predicate to libcall
- if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond);
+ if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
Libcall != RTLIB::UNKNOWN_LIBCALL &&
ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
@@ -1081,11 +1097,13 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// FCMP_UEQ: unordered or equal
// Convert into (FCMP_OEQ || FCMP_UNO).
- const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
- const auto Oeq = BuildLibcall(OeqLibcall, OeqPred);
+ const auto [OeqLibcall, OeqPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
+ const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
- const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
- const auto Uno = BuildLibcall(UnoLibcall, UnoPred);
+ const auto [UnoLibcall, UnoPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
+ const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
if (Oeq && Uno)
MIRBuilder.buildOr(DstReg, Oeq, Uno);
else
@@ -1100,13 +1118,15 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// We inverse the predicate instead of generating a NOT
// to save one instruction.
// On AArch64 isel can even select two cmp into a single ccmp.
- const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ);
+ const auto [OeqLibcall, OeqPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size);
const auto NotOeq =
- BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred));
+ BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
- const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO);
+ const auto [UnoLibcall, UnoPred] =
+ getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size);
const auto NotUno =
- BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred));
+ BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
if (NotOeq && NotUno)
MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
@@ -1128,7 +1148,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
// MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
// Op1, Op2));
const auto [InversedLibcall, InversedPred] =
- getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond));
+ getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond), Size);
if (!BuildLibcall(InversedLibcall,
CmpInst::getInversePredicate(InversedPred), DstReg))
return UnableToLegalize;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index a9294e76f8763f..e7166fdab8c268 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -530,7 +530,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.legalFor(ST.hasStdExtF(), {{sXLen, s32}})
.legalFor(ST.hasStdExtD(), {{sXLen, s64}})
.legalFor(ST.hasStdExtZfh(), {{sXLen, s16}})
- .clampScalar(ST.hasStdExtF(), 0, sXLen, sXLen);
+ .clampScalar(0, sXLen, sXLen)
+ .libcallFor({{sXLen, s32}, {sXLen, s64}});
// TODO: Support vector version of G_IS_FPCLASS.
getActionDefinitionsBuilder(G_IS_FPCLASS)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
new file mode 100644
index 00000000000000..ce3bd794477302
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll
@@ -0,0 +1,584 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: -target-abi=ilp32d | FileCheck -check-prefix=CHECKIFD %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d -verify-machineinstrs < %s \
+; RUN: -target-abi=lp64d | FileCheck -check-prefix=CHECKIFD %s
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+
+define i32 @fcmp_false(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_false:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: li a0, 0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_false:
+; RV32I: # %bb.0:
+; RV32I-NEXT: li a0, 0
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_false:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a0, 0
+; RV64I-NEXT: ret
+ %1 = fcmp false double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_oeq(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_oeq:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: feq.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_oeq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_oeq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp oeq double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ogt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ogt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa1, fa0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ogt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __gtdf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ogt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __gtdf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ogt double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_oge(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_oge:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa1, fa0
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_oge:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __gedf2
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: xori a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_oge:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __gedf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: xori a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp oge double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_olt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_olt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_olt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ltdf2
+; RV32I-NEXT: slti a0, a0, 0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_olt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ltdf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp olt double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ole(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ole:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa0, fa1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ole:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ledf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: xori a0, a0, 1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ole:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ledf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: xori a0, a0, 1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ole double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_one(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_one:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: flt.d a1, fa1, fa0
+; CHECKIFD-NEXT: or a0, a0, a1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_one:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s3, a3
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: snez s4, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: and a0, s4, a0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_one:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: snez s2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: and a0, s2, a0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %1 = fcmp one double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ord(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ord:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: feq.d a0, fa0, fa0
+; CHECKIFD-NEXT: feq.d a1, fa1, fa1
+; CHECKIFD-NEXT: and a0, a0, a1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ord:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ord:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %1 = fcmp ord double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ueq(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ueq:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: flt.d a0, fa0, fa1
+; CHECKIFD-NEXT: flt.d a1, fa1, fa0
+; CHECKIFD-NEXT: or a0, a0, a1
+; CHECKIFD-NEXT: xori a0, a0, 1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ueq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s2, a2
+; RV32I-NEXT: mv s3, a3
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a2, s2
+; RV32I-NEXT: mv a3, s3
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: or a0, s4, a0
+; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ueq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -32
+; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: mv s1, a1
+; RV64I-NEXT: call __eqdf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: seqz s2, a0
+; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a1, s1
+; RV64I-NEXT: call __unorddf2
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: or a0, s2, a0
+; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: ret
+ %1 = fcmp ueq double %a, %b
+ %2 = zext i1 %1 to i32
+ ret i32 %2
+}
+
+define i32 @fcmp_ugt(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: fcmp_ugt:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fle.d a0, fa0, fa1
+; CHECKIFD-NEXT: xori a0, a0, 1
+; CHECKIFD-NEXT: ret
+;
+; RV32I-LABEL: fcmp_ugt:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: call __ledf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: fcmp_ugt:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: call __ledf2
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-N...
[truncated]
|
| ; RV64I-NEXT: addi sp, sp, -16 | ||
| ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill | ||
| ; RV64I-NEXT: call __eqdf2 | ||
| ; RV64I-NEXT: slli a0, a0, 32 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still need to figure out how to get rid of this shift pair.
| ; RV64I-NEXT: addi sp, sp, -16 | ||
| ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill | ||
| ; RV64I-NEXT: call __gtdf2 | ||
| ; RV64I-NEXT: sext.w a0, a0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to figure out how to get rid of this sext.w
| ; RV64I: # %bb.0: | ||
| ; RV64I-NEXT: li a0, 0 | ||
| ; RV64I-NEXT: ret | ||
| %1 = fcmp false double %a, %b |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Named values
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was copied from an existing SelectionDAG test. I'd prefer to keep them in sync for easier comparison.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should just merge the tests with multiple run lines then?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Eventually I probably will. This is the first FP IR test I was able to completely copy and get it to pass. I've copied other tests, but had to delete an individual test case for one reason or another that I'll need to come back to.
LegalizerHelp only supported f128 libcalls and incorrectly assumed that the destination register for the G_FCMP was s32.