Skip to content

Commit f73e163

Browse files
authored
[DAGCombiner] Fold [us]itofp of truncate (llvm#149391)
1 parent 0e40695 commit f73e163

File tree

3 files changed

+140
-0
lines changed

3 files changed

+140
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18727,6 +18727,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
1872718727
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
1872818728
return FTrunc;
1872918729

18730+
// fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x)
18731+
if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() &&
18732+
TLI.isTypeDesirableForOp(ISD::SINT_TO_FP,
18733+
N0.getOperand(0).getValueType()))
18734+
return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0));
18735+
1873018736
return SDValue();
1873118737
}
1873218738

@@ -18764,6 +18770,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
1876418770
if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI))
1876518771
return FTrunc;
1876618772

18773+
// fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x)
18774+
if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() &&
18775+
TLI.isTypeDesirableForOp(ISD::UINT_TO_FP,
18776+
N0.getOperand(0).getValueType()))
18777+
return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0));
18778+
1876718779
return SDValue();
1876818780
}
1876918781

llvm/test/CodeGen/NVPTX/i8x4-instructions.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2311,4 +2311,51 @@ entry:
23112311
ret void
23122312
}
23132313

2314+
define <4 x float> @test_uitofp_v4i8(<4 x i8> %a) {
2315+
; CHECK-LABEL: test_uitofp_v4i8(
2316+
; CHECK: {
2317+
; CHECK-NEXT: .reg .b32 %r<10>;
2318+
; CHECK-EMPTY:
2319+
; CHECK-NEXT: // %bb.0:
2320+
; CHECK-NEXT: ld.param.b32 %r1, [test_uitofp_v4i8_param_0];
2321+
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7773U;
2322+
; CHECK-NEXT: cvt.rn.f32.u32 %r3, %r2;
2323+
; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U;
2324+
; CHECK-NEXT: cvt.rn.f32.u32 %r5, %r4;
2325+
; CHECK-NEXT: prmt.b32 %r6, %r1, 0, 0x7771U;
2326+
; CHECK-NEXT: cvt.rn.f32.u32 %r7, %r6;
2327+
; CHECK-NEXT: prmt.b32 %r8, %r1, 0, 0x7770U;
2328+
; CHECK-NEXT: cvt.rn.f32.u32 %r9, %r8;
2329+
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r9, %r7, %r5, %r3};
2330+
; CHECK-NEXT: ret;
2331+
%r = uitofp <4 x i8> %a to <4 x float>
2332+
ret <4 x float> %r
2333+
}
2334+
2335+
define <4 x float> @test_sitofp_v4i8(<4 x i8> %a) {
2336+
; CHECK-LABEL: test_sitofp_v4i8(
2337+
; CHECK: {
2338+
; CHECK-NEXT: .reg .b16 %rs<5>;
2339+
; CHECK-NEXT: .reg .b32 %r<10>;
2340+
; CHECK-EMPTY:
2341+
; CHECK-NEXT: // %bb.0:
2342+
; CHECK-NEXT: ld.param.b32 %r1, [test_sitofp_v4i8_param_0];
2343+
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0xbbb3U;
2344+
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
2345+
; CHECK-NEXT: cvt.rn.f32.s16 %r3, %rs1;
2346+
; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0xaaa2U;
2347+
; CHECK-NEXT: cvt.u16.u32 %rs2, %r4;
2348+
; CHECK-NEXT: cvt.rn.f32.s16 %r5, %rs2;
2349+
; CHECK-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U;
2350+
; CHECK-NEXT: cvt.u16.u32 %rs3, %r6;
2351+
; CHECK-NEXT: cvt.rn.f32.s16 %r7, %rs3;
2352+
; CHECK-NEXT: prmt.b32 %r8, %r1, 0, 0x8880U;
2353+
; CHECK-NEXT: cvt.u16.u32 %rs4, %r8;
2354+
; CHECK-NEXT: cvt.rn.f32.s16 %r9, %rs4;
2355+
; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r9, %r7, %r5, %r3};
2356+
; CHECK-NEXT: ret;
2357+
%r = sitofp <4 x i8> %a to <4 x float>
2358+
ret <4 x float> %r
2359+
}
2360+
23142361
attributes #0 = { nounwind }

llvm/test/CodeGen/NVPTX/trunc-tofp.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mcpu=sm_80 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -mcpu=sm_80 | %ptxas-verify %}
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
define float @uitofp_trunc_nuw(i32 %x, i32 %y) {
8+
; CHECK-LABEL: uitofp_trunc_nuw(
9+
; CHECK: {
10+
; CHECK-NEXT: .reg .b32 %r<5>;
11+
; CHECK-EMPTY:
12+
; CHECK-NEXT: // %bb.0:
13+
; CHECK-NEXT: ld.param.b32 %r1, [uitofp_trunc_nuw_param_0];
14+
; CHECK-NEXT: ld.param.b32 %r2, [uitofp_trunc_nuw_param_1];
15+
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
16+
; CHECK-NEXT: cvt.rn.f32.u32 %r4, %r3;
17+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
18+
; CHECK-NEXT: ret;
19+
%v = add i32 %x, %y
20+
%t = trunc nuw i32 %v to i16
21+
%f = uitofp i16 %t to float
22+
ret float %f
23+
}
24+
25+
define float @sitofp_trunc_nsw(i32 %x, i32 %y) {
26+
; CHECK-LABEL: sitofp_trunc_nsw(
27+
; CHECK: {
28+
; CHECK-NEXT: .reg .b32 %r<5>;
29+
; CHECK-EMPTY:
30+
; CHECK-NEXT: // %bb.0:
31+
; CHECK-NEXT: ld.param.b32 %r1, [sitofp_trunc_nsw_param_0];
32+
; CHECK-NEXT: ld.param.b32 %r2, [sitofp_trunc_nsw_param_1];
33+
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
34+
; CHECK-NEXT: cvt.rn.f32.s32 %r4, %r3;
35+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
36+
; CHECK-NEXT: ret;
37+
%v = add i32 %x, %y
38+
%t = trunc nsw i32 %v to i16
39+
%f = sitofp i16 %t to float
40+
ret float %f
41+
}
42+
43+
define float @uitofp_trunc_nsw(i32 %x, i32 %y) {
44+
; CHECK-LABEL: uitofp_trunc_nsw(
45+
; CHECK: {
46+
; CHECK-NEXT: .reg .b16 %rs<2>;
47+
; CHECK-NEXT: .reg .b32 %r<5>;
48+
; CHECK-EMPTY:
49+
; CHECK-NEXT: // %bb.0:
50+
; CHECK-NEXT: ld.param.b32 %r1, [uitofp_trunc_nsw_param_0];
51+
; CHECK-NEXT: ld.param.b32 %r2, [uitofp_trunc_nsw_param_1];
52+
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
53+
; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
54+
; CHECK-NEXT: cvt.rn.f32.u16 %r4, %rs1;
55+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
56+
; CHECK-NEXT: ret;
57+
%v = add i32 %x, %y
58+
%t = trunc nsw i32 %v to i16
59+
%f = uitofp i16 %t to float
60+
ret float %f
61+
}
62+
63+
define float @sitofp_trunc_nuw(i32 %x, i32 %y) {
64+
; CHECK-LABEL: sitofp_trunc_nuw(
65+
; CHECK: {
66+
; CHECK-NEXT: .reg .b16 %rs<2>;
67+
; CHECK-NEXT: .reg .b32 %r<5>;
68+
; CHECK-EMPTY:
69+
; CHECK-NEXT: // %bb.0:
70+
; CHECK-NEXT: ld.param.b32 %r1, [sitofp_trunc_nuw_param_0];
71+
; CHECK-NEXT: ld.param.b32 %r2, [sitofp_trunc_nuw_param_1];
72+
; CHECK-NEXT: add.s32 %r3, %r1, %r2;
73+
; CHECK-NEXT: cvt.u16.u32 %rs1, %r3;
74+
; CHECK-NEXT: cvt.rn.f32.s16 %r4, %rs1;
75+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
76+
; CHECK-NEXT: ret;
77+
%v = add i32 %x, %y
78+
%t = trunc nuw i32 %v to i16
79+
%f = sitofp i16 %t to float
80+
ret float %f
81+
}

0 commit comments

Comments
 (0)