diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 321760ef822bc..29c7d805ea81e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1001,23 +1001,36 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, /// the ICMP predicate that should be generated to compare with #0 /// after the libcall. static std::pair -getFCMPLibcallDesc(const CmpInst::Predicate Pred) { +getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size) { +#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \ + do { \ + switch (Size) { \ + case 32: \ + return {RTLIB::LibcallPrefix##32, ICmpPred}; \ + case 64: \ + return {RTLIB::LibcallPrefix##64, ICmpPred}; \ + case 128: \ + return {RTLIB::LibcallPrefix##128, ICmpPred}; \ + default: \ + llvm_unreachable("unexpected size"); \ + } \ + } while (0) switch (Pred) { case CmpInst::FCMP_OEQ: - return {RTLIB::OEQ_F128, CmpInst::ICMP_EQ}; + RTLIBCASE_CMP(OEQ_F, CmpInst::ICMP_EQ); case CmpInst::FCMP_UNE: - return {RTLIB::UNE_F128, CmpInst::ICMP_NE}; + RTLIBCASE_CMP(UNE_F, CmpInst::ICMP_NE); case CmpInst::FCMP_OGE: - return {RTLIB::OGE_F128, CmpInst::ICMP_SGE}; + RTLIBCASE_CMP(OGE_F, CmpInst::ICMP_SGE); case CmpInst::FCMP_OLT: - return {RTLIB::OLT_F128, CmpInst::ICMP_SLT}; + RTLIBCASE_CMP(OLT_F, CmpInst::ICMP_SLT); case CmpInst::FCMP_OLE: - return {RTLIB::OLE_F128, CmpInst::ICMP_SLE}; + RTLIBCASE_CMP(OLE_F, CmpInst::ICMP_SLE); case CmpInst::FCMP_OGT: - return {RTLIB::OGT_F128, CmpInst::ICMP_SGT}; + RTLIBCASE_CMP(OGT_F, CmpInst::ICMP_SGT); case CmpInst::FCMP_UNO: - return {RTLIB::UO_F128, CmpInst::ICMP_NE}; + RTLIBCASE_CMP(UO_F, CmpInst::ICMP_NE); default: return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE}; } @@ -1032,21 +1045,24 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder, const GFCmp *Cmp = cast(&MI); LLT OpLLT = MRI.getType(Cmp->getLHSReg()); - if (OpLLT != LLT::scalar(128) || OpLLT != MRI.getType(Cmp->getRHSReg())) + unsigned Size = OpLLT.getSizeInBits(); + if ((Size != 32 && Size != 64 && Size != 128) || + OpLLT != MRI.getType(Cmp->getRHSReg())) return UnableToLegalize; Type *OpType = getFloatTypeForLLT(Ctx, OpLLT); // DstReg type is s32 const Register DstReg = Cmp->getReg(0); + LLT DstTy = MRI.getType(DstReg); const auto Cond = Cmp->getCond(); // Reference: // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1 // Generates a libcall followed by ICMP. - const auto BuildLibcall = - [&](const RTLIB::Libcall Libcall, const CmpInst::Predicate ICmpPred, - const DstOp &Res = LLT::scalar(32)) -> Register { + const auto BuildLibcall = [&](const RTLIB::Libcall Libcall, + const CmpInst::Predicate ICmpPred, + const DstOp &Res) -> Register { // FCMP libcall always returns an i32, and needs an ICMP with #0. constexpr LLT TempLLT = LLT::scalar(32); Register Temp = MRI.createGenericVirtualRegister(TempLLT); @@ -1065,7 +1081,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder, }; // Simple case if we have a direct mapping from predicate to libcall - if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond); + if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size); Libcall != RTLIB::UNKNOWN_LIBCALL && ICmpPred != CmpInst::BAD_ICMP_PREDICATE) { if (BuildLibcall(Libcall, ICmpPred, DstReg)) { @@ -1081,11 +1097,13 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder, // FCMP_UEQ: unordered or equal // Convert into (FCMP_OEQ || FCMP_UNO). - const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ); - const auto Oeq = BuildLibcall(OeqLibcall, OeqPred); + const auto [OeqLibcall, OeqPred] = + getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size); + const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy); - const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO); - const auto Uno = BuildLibcall(UnoLibcall, UnoPred); + const auto [UnoLibcall, UnoPred] = + getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size); + const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy); if (Oeq && Uno) MIRBuilder.buildOr(DstReg, Oeq, Uno); else @@ -1100,13 +1118,15 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder, // We inverse the predicate instead of generating a NOT // to save one instruction. // On AArch64 isel can even select two cmp into a single ccmp. - const auto [OeqLibcall, OeqPred] = getFCMPLibcallDesc(CmpInst::FCMP_OEQ); + const auto [OeqLibcall, OeqPred] = + getFCMPLibcallDesc(CmpInst::FCMP_OEQ, Size); const auto NotOeq = - BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred)); + BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy); - const auto [UnoLibcall, UnoPred] = getFCMPLibcallDesc(CmpInst::FCMP_UNO); + const auto [UnoLibcall, UnoPred] = + getFCMPLibcallDesc(CmpInst::FCMP_UNO, Size); const auto NotUno = - BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred)); + BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy); if (NotOeq && NotUno) MIRBuilder.buildAnd(DstReg, NotOeq, NotUno); @@ -1128,7 +1148,7 @@ LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder, // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy, // Op1, Op2)); const auto [InversedLibcall, InversedPred] = - getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond)); + getFCMPLibcallDesc(CmpInst::getInversePredicate(Cond), Size); if (!BuildLibcall(InversedLibcall, CmpInst::getInversePredicate(InversedPred), DstReg)) return UnableToLegalize; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index a9294e76f8763..e7166fdab8c26 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -530,7 +530,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .legalFor(ST.hasStdExtF(), {{sXLen, s32}}) .legalFor(ST.hasStdExtD(), {{sXLen, s64}}) .legalFor(ST.hasStdExtZfh(), {{sXLen, s16}}) - .clampScalar(ST.hasStdExtF(), 0, sXLen, sXLen); + .clampScalar(0, sXLen, sXLen) + .libcallFor({{sXLen, s32}, {sXLen, s64}}); // TODO: Support vector version of G_IS_FPCLASS. getActionDefinitionsBuilder(G_IS_FPCLASS) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll new file mode 100644 index 0000000000000..7ef1af22370a1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-fcmp.ll @@ -0,0 +1,599 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d < %s \ +; RUN: -target-abi=ilp32d | FileCheck -check-prefix=CHECKIFD %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d < %s \ +; RUN: -target-abi=lp64d | FileCheck -check-prefix=CHECKIFD %s +; RUN: llc -mtriple=riscv32 -global-isel < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -global-isel < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +define i32 @fcmp_false(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_false: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: li a0, 0 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_false: +; RV32I: # %bb.0: +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_false: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: ret + %1 = fcmp false double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_oeq(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_oeq: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: feq.d a0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_oeq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __eqdf2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_oeq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __eqdf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp oeq double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_ogt(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_ogt: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: flt.d a0, fa1, fa0 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_ogt: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __gtdf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ogt: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __gtdf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ogt double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_oge(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_oge: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fle.d a0, fa1, fa0 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_oge: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __gedf2 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_oge: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __gedf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp oge double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_olt(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_olt: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: flt.d a0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_olt: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __ltdf2 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_olt: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __ltdf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp olt double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +; FIXME: sgtz+xori can be slti a0, a0, 1 +define i32 @fcmp_ole(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_ole: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fle.d a0, fa0, fa1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_ole: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __ledf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ole: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __ledf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ole double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_one(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_one: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: flt.d a0, fa0, fa1 +; CHECKIFD-NEXT: flt.d a1, fa1, fa0 +; CHECKIFD-NEXT: or a0, a0, a1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: call __eqdf2 +; RV32I-NEXT: snez s4, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call __unorddf2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: and a0, s4, a0 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: call __eqdf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: snez s2, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __unorddf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: and a0, s2, a0 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %1 = fcmp one double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_ord(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_ord: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: feq.d a0, fa0, fa0 +; CHECKIFD-NEXT: feq.d a1, fa1, fa1 +; CHECKIFD-NEXT: and a0, a0, a1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_ord: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __unorddf2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ord: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __unorddf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ord double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_ueq(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_ueq: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: flt.d a0, fa0, fa1 +; CHECKIFD-NEXT: flt.d a1, fa1, fa0 +; CHECKIFD-NEXT: or a0, a0, a1 +; CHECKIFD-NEXT: xori a0, a0, 1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_ueq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: call __eqdf2 +; RV32I-NEXT: seqz s4, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: call __unorddf2 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: or a0, s4, a0 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ueq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: call __eqdf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: seqz s2, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __unorddf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: or a0, s2, a0 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %1 = fcmp ueq double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_ugt(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_ugt: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fle.d a0, fa0, fa1 +; CHECKIFD-NEXT: xori a0, a0, 1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_ugt: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __ledf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ugt: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __ledf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ugt double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_uge: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: flt.d a0, fa0, fa1 +; CHECKIFD-NEXT: xori a0, a0, 1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_uge: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __ltdf2 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_uge: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __ltdf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp uge double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_ult(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_ult: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: fle.d a0, fa1, fa0 +; CHECKIFD-NEXT: xori a0, a0, 1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_ult: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __gedf2 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ult: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __gedf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ult double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +; FIXME: sgtz+xori can be slti a0, a0, 1 +define i32 @fcmp_ule(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_ule: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: flt.d a0, fa1, fa0 +; CHECKIFD-NEXT: xori a0, a0, 1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_ule: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __gtdf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ule: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __gtdf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ule double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_une(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_une: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: feq.d a0, fa0, fa1 +; CHECKIFD-NEXT: xori a0, a0, 1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_une: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __nedf2 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_une: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __nedf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp une double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_uno(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_uno: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: feq.d a0, fa0, fa0 +; CHECKIFD-NEXT: feq.d a1, fa1, fa1 +; CHECKIFD-NEXT: and a0, a0, a1 +; CHECKIFD-NEXT: xori a0, a0, 1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_uno: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __unorddf2 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_uno: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __unorddf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp uno double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_true(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fcmp_true: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: li a0, 1 +; CHECKIFD-NEXT: ret +; +; RV32I-LABEL: fcmp_true: +; RV32I: # %bb.0: +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_true: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: ret + %1 = fcmp true double %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-fcmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-fcmp.ll new file mode 100644 index 0000000000000..bdd779d476109 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-fcmp.ll @@ -0,0 +1,584 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+f -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=CHECKIF %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+f -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64f | FileCheck -check-prefix=CHECKIF %s +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +define i32 @fcmp_false(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_false: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: li a0, 0 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_false: +; RV32I: # %bb.0: +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_false: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: ret + %1 = fcmp false float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_oeq(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_oeq: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: feq.s a0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_oeq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __eqsf2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_oeq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __eqsf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp oeq float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_ogt(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_ogt: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: flt.s a0, fa1, fa0 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_ogt: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ogt: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __gtsf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ogt float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_oge(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_oge: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fle.s a0, fa1, fa0 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_oge: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __gesf2 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_oge: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __gesf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp oge float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_olt(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_olt: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: flt.s a0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_olt: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __ltsf2 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_olt: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __ltsf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp olt float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +; FIXME: sgtz+xori can be slti a0, a0, 1 +define i32 @fcmp_ole(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_ole: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fle.s a0, fa0, fa1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_ole: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __lesf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ole: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __lesf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ole float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_one(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_one: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: flt.s a0, fa0, fa1 +; CHECKIF-NEXT: flt.s a1, fa1, fa0 +; CHECKIF-NEXT: or a0, a0, a1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_one: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: call __eqsf2 +; RV32I-NEXT: snez s2, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __unordsf2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: and a0, s2, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_one: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: call __eqsf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: snez s2, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __unordsf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: and a0, s2, a0 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %1 = fcmp one float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_ord(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_ord: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: feq.s a0, fa0, fa0 +; CHECKIF-NEXT: feq.s a1, fa1, fa1 +; CHECKIF-NEXT: and a0, a0, a1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_ord: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __unordsf2 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ord: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __unordsf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ord float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_ueq(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_ueq: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: flt.s a0, fa0, fa1 +; CHECKIF-NEXT: flt.s a1, fa1, fa0 +; CHECKIF-NEXT: or a0, a0, a1 +; CHECKIF-NEXT: xori a0, a0, 1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_ueq: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: call __eqsf2 +; RV32I-NEXT: seqz s2, a0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call __unordsf2 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ueq: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: call __eqsf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: seqz s2, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call __unordsf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: or a0, s2, a0 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %1 = fcmp ueq float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_ugt(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_ugt: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fle.s a0, fa0, fa1 +; CHECKIF-NEXT: xori a0, a0, 1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_ugt: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __lesf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ugt: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __lesf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ugt float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_uge(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_uge: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: flt.s a0, fa0, fa1 +; CHECKIF-NEXT: xori a0, a0, 1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_uge: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __ltsf2 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_uge: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __ltsf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp uge float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +define i32 @fcmp_ult(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_ult: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: fle.s a0, fa1, fa0 +; CHECKIF-NEXT: xori a0, a0, 1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_ult: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __gesf2 +; RV32I-NEXT: slti a0, a0, 0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ult: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __gesf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ult float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: sext.w on RV64 is unnecessary +; FIXME: sgtz+xori can be slti a0, a0, 1 +define i32 @fcmp_ule(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_ule: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: flt.s a0, fa1, fa0 +; CHECKIF-NEXT: xori a0, a0, 1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_ule: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_ule: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __gtsf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ule float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_une(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_une: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: feq.s a0, fa0, fa1 +; CHECKIF-NEXT: xori a0, a0, 1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_une: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __nesf2 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_une: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __nesf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp une float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +; FIXME: slli+srli on RV64 are unnecessary +define i32 @fcmp_uno(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_uno: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: feq.s a0, fa0, fa0 +; CHECKIF-NEXT: feq.s a1, fa1, fa1 +; CHECKIF-NEXT: and a0, a0, a1 +; CHECKIF-NEXT: xori a0, a0, 1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_uno: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __unordsf2 +; RV32I-NEXT: snez a0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_uno: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __unordsf2 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp uno float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_true(float %a, float %b) nounwind { +; CHECKIF-LABEL: fcmp_true: +; CHECKIF: # %bb.0: +; CHECKIF-NEXT: li a0, 1 +; CHECKIF-NEXT: ret +; +; RV32I-LABEL: fcmp_true: +; RV32I: # %bb.0: +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcmp_true: +; RV64I: # %bb.0: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: ret + %1 = fcmp true float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +}