Skip to content

Commit 5b20597

Browse files
committed
[SDAG][NVPTX] Add TLI check for preferring custom FP_TO_SINT operations to FP_TO_UINT
1 parent 6b00ae6 commit 5b20597

File tree

4 files changed

+164
-5
lines changed

4 files changed

+164
-5
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3464,6 +3464,12 @@ class TargetLoweringBase {
34643464
return false;
34653465
}
34663466

3467+
// Is it preferable to legalize FP types to SINT instead of UINT if both SINT
3468+
// and UINT are custom.
3469+
virtual bool preferPromoteFPToCustomSINTOverCustomUINT() const {
3470+
return true;
3471+
}
3472+
34673473
/// Create the IR node for the given complex deinterleaving operation.
34683474
/// If one cannot be created using all the given inputs, nullptr should be
34693475
/// returned.

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -853,21 +853,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
853853

854854
// If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
855855
// not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
856-
// and SINT conversions are Custom, there is no way to tell which is
857-
// preferable. We choose SINT because that's the right thing on PPC.)
856+
// and SINT conversions are Custom, we use a TLI call to check which is
857+
// preferable.)
858858
if (N->getOpcode() == ISD::FP_TO_UINT &&
859859
!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
860-
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
860+
(TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) ||
861+
(!TLI.isOperationCustom(ISD::FP_TO_UINT, NVT) &&
862+
TLI.isOperationCustom(ISD::FP_TO_SINT, NVT)) ||
863+
(TLI.isOperationCustom(ISD::FP_TO_SINT, NVT) &&
864+
TLI.isOperationCustom(ISD::FP_TO_UINT, NVT) &&
865+
TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
861866
NewOpc = ISD::FP_TO_SINT;
862867

863868
if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
864869
!TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
865-
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
870+
(TLI.isOperationLegal(ISD::STRICT_FP_TO_SINT, NVT) ||
871+
(!TLI.isOperationCustom(ISD::STRICT_FP_TO_UINT, NVT) &&
872+
TLI.isOperationCustom(ISD::STRICT_FP_TO_SINT, NVT)) ||
873+
(TLI.isOperationCustom(ISD::STRICT_FP_TO_SINT, NVT) &&
874+
TLI.isOperationCustom(ISD::STRICT_FP_TO_UINT, NVT) &&
875+
TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
866876
NewOpc = ISD::STRICT_FP_TO_SINT;
867877

868878
if (N->getOpcode() == ISD::VP_FP_TO_UINT &&
869879
!TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) &&
870-
TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT))
880+
(TLI.isOperationLegal(ISD::VP_FP_TO_SINT, NVT) ||
881+
(!TLI.isOperationCustom(ISD::VP_FP_TO_UINT, NVT) &&
882+
TLI.isOperationCustom(ISD::VP_FP_TO_SINT, NVT)) ||
883+
(TLI.isOperationCustom(ISD::VP_FP_TO_SINT, NVT) &&
884+
TLI.isOperationCustom(ISD::VP_FP_TO_UINT, NVT) &&
885+
TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
871886
NewOpc = ISD::VP_FP_TO_SINT;
872887

873888
SDValue Res;

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,10 @@ class NVPTXTargetLowering : public TargetLowering {
282282
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
283283
AtomicOrdering Ord) const override;
284284

285+
bool preferPromoteFPToCustomSINTOverCustomUINT() const override {
286+
return false;
287+
}
288+
285289
private:
286290
const NVPTXSubtarget &STI; // cache the subtarget here
287291
mutable unsigned GlobalUniqueCallSite;
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 | FileCheck %s
3+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s
4+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
5+
6+
define i8 @cvt_u8_f32(float %x) {
7+
; CHECK-LABEL: cvt_u8_f32(
8+
; CHECK: {
9+
; CHECK-NEXT: .reg .b16 %rs<2>;
10+
; CHECK-NEXT: .reg .b32 %r<2>;
11+
; CHECK-NEXT: .reg .f32 %f<2>;
12+
; CHECK-EMPTY:
13+
; CHECK-NEXT: // %bb.0:
14+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_u8_f32_param_0];
15+
; CHECK-NEXT: cvt.rzi.u16.f32 %rs1, %f1;
16+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
17+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
18+
; CHECK-NEXT: ret;
19+
%a = fptoui float %x to i8
20+
ret i8 %a
21+
}
22+
23+
define i8 @cvt_u8_f64(double %x) {
24+
; CHECK-LABEL: cvt_u8_f64(
25+
; CHECK: {
26+
; CHECK-NEXT: .reg .b16 %rs<2>;
27+
; CHECK-NEXT: .reg .b32 %r<2>;
28+
; CHECK-NEXT: .reg .f64 %fd<2>;
29+
; CHECK-EMPTY:
30+
; CHECK-NEXT: // %bb.0:
31+
; CHECK-NEXT: ld.param.f64 %fd1, [cvt_u8_f64_param_0];
32+
; CHECK-NEXT: cvt.rzi.u16.f64 %rs1, %fd1;
33+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
34+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
35+
; CHECK-NEXT: ret;
36+
%a = fptoui double %x to i8
37+
ret i8 %a
38+
}
39+
40+
define float @cvt_f32_i8(i8 %x) {
41+
; CHECK-LABEL: cvt_f32_i8(
42+
; CHECK: {
43+
; CHECK-NEXT: .reg .b16 %rs<2>;
44+
; CHECK-NEXT: .reg .f32 %f<2>;
45+
; CHECK-EMPTY:
46+
; CHECK-NEXT: // %bb.0:
47+
; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f32_i8_param_0];
48+
; CHECK-NEXT: cvt.rn.f32.u16 %f1, %rs1;
49+
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
50+
; CHECK-NEXT: ret;
51+
%a = uitofp i8 %x to float
52+
ret float %a
53+
}
54+
55+
define double @cvt_f64_i8(i8 %x) {
56+
; CHECK-LABEL: cvt_f64_i8(
57+
; CHECK: {
58+
; CHECK-NEXT: .reg .b16 %rs<2>;
59+
; CHECK-NEXT: .reg .f64 %fd<2>;
60+
; CHECK-EMPTY:
61+
; CHECK-NEXT: // %bb.0:
62+
; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f64_i8_param_0];
63+
; CHECK-NEXT: cvt.rn.f64.u16 %fd1, %rs1;
64+
; CHECK-NEXT: st.param.f64 [func_retval0], %fd1;
65+
; CHECK-NEXT: ret;
66+
%a = uitofp i8 %x to double
67+
ret double %a
68+
}
69+
70+
define float @cvt_f32_s8(i8 %x) {
71+
; CHECK-LABEL: cvt_f32_s8(
72+
; CHECK: {
73+
; CHECK-NEXT: .reg .b16 %rs<2>;
74+
; CHECK-NEXT: .reg .f32 %f<2>;
75+
; CHECK-EMPTY:
76+
; CHECK-NEXT: // %bb.0:
77+
; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f32_s8_param_0];
78+
; CHECK-NEXT: cvt.rn.f32.s16 %f1, %rs1;
79+
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
80+
; CHECK-NEXT: ret;
81+
%a = sitofp i8 %x to float
82+
ret float %a
83+
}
84+
85+
define double @cvt_f64_s8(i8 %x) {
86+
; CHECK-LABEL: cvt_f64_s8(
87+
; CHECK: {
88+
; CHECK-NEXT: .reg .b16 %rs<2>;
89+
; CHECK-NEXT: .reg .f64 %fd<2>;
90+
; CHECK-EMPTY:
91+
; CHECK-NEXT: // %bb.0:
92+
; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f64_s8_param_0];
93+
; CHECK-NEXT: cvt.rn.f64.s16 %fd1, %rs1;
94+
; CHECK-NEXT: st.param.f64 [func_retval0], %fd1;
95+
; CHECK-NEXT: ret;
96+
%a = sitofp i8 %x to double
97+
ret double %a
98+
}
99+
100+
define i8 @cvt_s8_f32(float %x) {
101+
; CHECK-LABEL: cvt_s8_f32(
102+
; CHECK: {
103+
; CHECK-NEXT: .reg .b16 %rs<2>;
104+
; CHECK-NEXT: .reg .b32 %r<3>;
105+
; CHECK-NEXT: .reg .f32 %f<2>;
106+
; CHECK-EMPTY:
107+
; CHECK-NEXT: // %bb.0:
108+
; CHECK-NEXT: ld.param.f32 %f1, [cvt_s8_f32_param_0];
109+
; CHECK-NEXT: cvt.rzi.s16.f32 %rs1, %f1;
110+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
111+
; CHECK-NEXT: and.b32 %r2, %r1, 255;
112+
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
113+
; CHECK-NEXT: ret;
114+
%a = fptosi float %x to i8
115+
ret i8 %a
116+
}
117+
118+
define i8 @cvt_s8_f64(double %x) {
119+
; CHECK-LABEL: cvt_s8_f64(
120+
; CHECK: {
121+
; CHECK-NEXT: .reg .b16 %rs<2>;
122+
; CHECK-NEXT: .reg .b32 %r<3>;
123+
; CHECK-NEXT: .reg .f64 %fd<2>;
124+
; CHECK-EMPTY:
125+
; CHECK-NEXT: // %bb.0:
126+
; CHECK-NEXT: ld.param.f64 %fd1, [cvt_s8_f64_param_0];
127+
; CHECK-NEXT: cvt.rzi.s16.f64 %rs1, %fd1;
128+
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
129+
; CHECK-NEXT: and.b32 %r2, %r1, 255;
130+
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
131+
; CHECK-NEXT: ret;
132+
%a = fptosi double %x to i8
133+
ret i8 %a
134+
}

0 commit comments

Comments
 (0)