diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 16066226f1896..4f2f202f94841 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3464,6 +3464,34 @@ class TargetLoweringBase { return false; } + // Get the preferred opcode for FP_TO_XINT nodes. + // By default, this checks if the provded operation is an illegal FP_TO_UINT + // and if so, checks if FP_TO_SINT is legal or custom for use as a + // replacement. If both UINT and SINT conversions are Custom, we choose SINT + // by default because that's the right thing on PPC. + virtual unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT, + EVT ToVT) const { + if (isOperationLegal(Op, ToVT)) + return Op; + switch (Op) { + case ISD::FP_TO_UINT: + if (isOperationLegalOrCustom(ISD::FP_TO_SINT, ToVT)) + return ISD::FP_TO_SINT; + break; + case ISD::STRICT_FP_TO_UINT: + if (isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, ToVT)) + return ISD::STRICT_FP_TO_SINT; + break; + case ISD::VP_FP_TO_UINT: + if (isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, ToVT)) + return ISD::VP_FP_TO_SINT; + break; + default: + break; + } + return Op; + } + /// Create the IR node for the given complex deinterleaving operation. /// If one cannot be created using all the given inputs, nullptr should be /// returned. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f944104a0e9d6..4685330c5aa14 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -849,28 +849,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned NewOpc = N->getOpcode(); + unsigned NewOpc = + TLI.getPreferredFPToIntOpcode(N->getOpcode(), N->getValueType(0), NVT); SDLoc dl(N); - // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is - // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT - // and SINT conversions are Custom, there is no way to tell which is - // preferable. We choose SINT because that's the right thing on PPC.) - if (N->getOpcode() == ISD::FP_TO_UINT && - !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && - TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) - NewOpc = ISD::FP_TO_SINT; - - if (N->getOpcode() == ISD::STRICT_FP_TO_UINT && - !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) && - TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) - NewOpc = ISD::STRICT_FP_TO_SINT; - - if (N->getOpcode() == ISD::VP_FP_TO_UINT && - !TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) && - TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT)) - NewOpc = ISD::VP_FP_TO_SINT; - SDValue Res; if (N->isStrictFPOpcode()) { Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index b566cdd4b6bfc..e3f685c38f297 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -6214,6 +6214,33 @@ Instruction *NVPTXTargetLowering::emitTrailingFence(IRBuilderBase &Builder, return nullptr; } +// Rather than default to SINT when both UINT and SINT are custom, we only +// change the opcode when UINT is not legal and SINT is. UINT is preferred when +// both are custom since unsigned CVT instructions can lead to slightly better +// SASS code with fewer instructions. +unsigned NVPTXTargetLowering::getPreferredFPToIntOpcode(unsigned Op, EVT FromVT, + EVT ToVT) const { + if (isOperationLegal(Op, ToVT)) + return Op; + switch (Op) { + case ISD::FP_TO_UINT: + if (isOperationLegal(ISD::FP_TO_SINT, ToVT)) + return ISD::FP_TO_SINT; + break; + case ISD::STRICT_FP_TO_UINT: + if (isOperationLegal(ISD::STRICT_FP_TO_SINT, ToVT)) + return ISD::STRICT_FP_TO_SINT; + break; + case ISD::VP_FP_TO_UINT: + if (isOperationLegal(ISD::VP_FP_TO_SINT, ToVT)) + return ISD::VP_FP_TO_SINT; + break; + default: + break; + } + return Op; +} + // Pin NVPTXTargetObjectFile's vtables to this file. NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 39470be254efa..dd90746f6d9d6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -282,6 +282,9 @@ class NVPTXTargetLowering : public TargetLowering { Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override; + unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT, + EVT ToVT) const override; + private: const NVPTXSubtarget &STI; // cache the subtarget here mutable unsigned GlobalUniqueCallSite; diff --git a/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll b/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll new file mode 100644 index 0000000000000..93da39137afd8 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s + +define i8 @cvt_u8_f32(float %x) { +; CHECK-LABEL: cvt_u8_f32( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [cvt_u8_f32_param_0]; +; CHECK-NEXT: cvt.rzi.u16.f32 %rs1, %f1; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %a = fptoui float %x to i8 + ret i8 %a +} + +define i8 @cvt_u8_f64(double %x) { +; CHECK-LABEL: cvt_u8_f64( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f64 %fd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [cvt_u8_f64_param_0]; +; CHECK-NEXT: cvt.rzi.u16.f64 %rs1, %fd1; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %a = fptoui double %x to i8 + ret i8 %a +} + +define float @cvt_f32_i8(i8 %x) { +; CHECK-LABEL: cvt_f32_i8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f32_i8_param_0]; +; CHECK-NEXT: cvt.rn.f32.u16 %f1, %rs1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f1; +; CHECK-NEXT: ret; + %a = uitofp i8 %x to float + ret float %a +} + +define double @cvt_f64_i8(i8 %x) { +; CHECK-LABEL: cvt_f64_i8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f64 %fd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f64_i8_param_0]; +; CHECK-NEXT: cvt.rn.f64.u16 %fd1, %rs1; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd1; +; CHECK-NEXT: ret; + %a = uitofp i8 %x to double + ret double %a +} + +define float @cvt_f32_s8(i8 %x) { +; CHECK-LABEL: cvt_f32_s8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f32_s8_param_0]; +; CHECK-NEXT: cvt.rn.f32.s16 %f1, %rs1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f1; +; CHECK-NEXT: ret; + %a = sitofp i8 %x to float + ret float %a +} + +define double @cvt_f64_s8(i8 %x) { +; CHECK-LABEL: cvt_f64_s8( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .f64 %fd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f64_s8_param_0]; +; CHECK-NEXT: cvt.rn.f64.s16 %fd1, %rs1; +; CHECK-NEXT: st.param.f64 [func_retval0], %fd1; +; CHECK-NEXT: ret; + %a = sitofp i8 %x to double + ret double %a +} + +define i8 @cvt_s8_f32(float %x) { +; CHECK-LABEL: cvt_s8_f32( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [cvt_s8_f32_param_0]; +; CHECK-NEXT: cvt.rzi.s16.f32 %rs1, %f1; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: and.b32 %r2, %r1, 255; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %a = fptosi float %x to i8 + ret i8 %a +} + +define i8 @cvt_s8_f64(double %x) { +; CHECK-LABEL: cvt_s8_f64( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .f64 %fd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f64 %fd1, [cvt_s8_f64_param_0]; +; CHECK-NEXT: cvt.rzi.s16.f64 %rs1, %fd1; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: and.b32 %r2, %r1, 255; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %a = fptosi double %x to i8 + ret i8 %a +}