Skip to content

Commit d0bfeed

Browse files
author
Salinas, David
authored
AMDGPU: Fix the double rounding issue in v2f64 -> v2f16 conversion (#… (llvm#1903)
2 parents 9d3e440 + c414d7c commit d0bfeed

File tree

4 files changed

+222
-12
lines changed

4 files changed

+222
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,8 +1042,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
10421042

10431043
auto &FPTruncActions = getActionDefinitionsBuilder(G_FPTRUNC);
10441044
if (ST.hasCvtPkF16F32Inst())
1045-
FPTruncActions.legalFor(
1046-
{{S32, S64}, {S16, S32}, {V2S16, V2S32}, {V2S16, V2S64}});
1045+
FPTruncActions.legalFor({{S32, S64}, {S16, S32}, {V2S16, V2S32}});
10471046
else
10481047
FPTruncActions.legalFor({{S32, S64}, {S16, S32}});
10491048
FPTruncActions.scalarize(0).lower();

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -916,10 +916,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
916916
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
917917
}
918918

919-
if (Subtarget->hasCvtPkF16F32Inst()) {
920-
setOperationAction(ISD::FP_ROUND, MVT::v2f16, Legal);
921-
}
922-
923919
setTargetDAGCombine({ISD::ADD,
924920
ISD::UADDO_CARRY,
925921
ISD::SUB,

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,9 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
654654
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
655655
; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
656656
; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
657-
; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
657+
; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
658+
; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
659+
; GFX950-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
658660
; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
659661
; GFX950-SDAG-NEXT: s_endpgm
660662
;
@@ -666,11 +668,11 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
666668
; GFX950-GISEL-NEXT: s_mov_b32 s2, -1
667669
; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000
668670
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
669-
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
670-
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
671-
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
672-
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
673-
; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
671+
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
672+
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
673+
; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
674+
; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
675+
; GFX950-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
674676
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
675677
; GFX950-GISEL-NEXT: s_endpgm
676678
;
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 < %s | FileCheck -check-prefixes=GFX950,GFX950-SAFE-SDAG %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 < %s | FileCheck -check-prefixes=GFX950,GFX950-SAFE-GISEL %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX950,GFX950-UNSAFE %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX950,GFX950-UNSAFE %s
6+
7+
define <2 x half> @v_test_cvt_v2f32_v2f16(<2 x float> %src) {
8+
; GFX950-LABEL: v_test_cvt_v2f32_v2f16:
9+
; GFX950: ; %bb.0:
10+
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11+
; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
12+
; GFX950-NEXT: s_setpc_b64 s[30:31]
13+
%res = fptrunc <2 x float> %src to <2 x half>
14+
ret <2 x half> %res
15+
}
16+
17+
define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
18+
; GFX950-SAFE-SDAG-LABEL: v_test_cvt_v2f64_v2f16:
19+
; GFX950-SAFE-SDAG: ; %bb.0:
20+
; GFX950-SAFE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21+
; GFX950-SAFE-SDAG-NEXT: s_movk_i32 s0, 0x1ff
22+
; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v0, v1, s0, v0
23+
; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
24+
; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v4, 8, v1
25+
; GFX950-SAFE-SDAG-NEXT: s_movk_i32 s1, 0xffe
26+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
27+
; GFX950-SAFE-SDAG-NEXT: v_bfe_u32 v5, v1, 20, 11
28+
; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v0, v4, s1, v0
29+
; GFX950-SAFE-SDAG-NEXT: v_sub_u32_e32 v6, 0x3f1, v5
30+
; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v4, 0x1000, v0
31+
; GFX950-SAFE-SDAG-NEXT: v_med3_i32 v6, v6, 0, 13
32+
; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v7, v6, v4
33+
; GFX950-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v6, v6, v7
34+
; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v6, v4
35+
; GFX950-SAFE-SDAG-NEXT: v_add_u32_e32 v5, 0xfffffc10, v5
36+
; GFX950-SAFE-SDAG-NEXT: v_lshl_or_b32 v6, v5, 12, v0
37+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
38+
; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v4, v7, v4
39+
; GFX950-SAFE-SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 1, v5
40+
; GFX950-SAFE-SDAG-NEXT: s_movk_i32 s2, 0x40f
41+
; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1
42+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
43+
; GFX950-SAFE-SDAG-NEXT: v_and_b32_e32 v6, 7, v4
44+
; GFX950-SAFE-SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 5, v6
45+
; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v4, 2, v4
46+
; GFX950-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x8000
47+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
48+
; GFX950-SAFE-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6
49+
; GFX950-SAFE-SDAG-NEXT: s_nop 1
50+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
51+
; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v6, v6, v7
52+
; GFX950-SAFE-SDAG-NEXT: v_add_u32_e32 v4, v4, v6
53+
; GFX950-SAFE-SDAG-NEXT: v_mov_b32_e32 v6, 0x7c00
54+
; GFX950-SAFE-SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 31, v5
55+
; GFX950-SAFE-SDAG-NEXT: v_mov_b32_e32 v7, 0x7e00
56+
; GFX950-SAFE-SDAG-NEXT: s_nop 0
57+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
58+
; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
59+
; GFX950-SAFE-SDAG-NEXT: s_nop 1
60+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc
61+
; GFX950-SAFE-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s2, v5
62+
; GFX950-SAFE-SDAG-NEXT: s_nop 1
63+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
64+
; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v0, v1, s3, v0
65+
; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v1, v3, s0, v2
66+
; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
67+
; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v2, 8, v3
68+
; GFX950-SAFE-SDAG-NEXT: v_bfe_u32 v4, v3, 20, 11
69+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
70+
; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v1, v2, s1, v1
71+
; GFX950-SAFE-SDAG-NEXT: v_sub_u32_e32 v5, 0x3f1, v4
72+
; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v2, 0x1000, v1
73+
; GFX950-SAFE-SDAG-NEXT: v_med3_i32 v5, v5, 0, 13
74+
; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v8, v5, v2
75+
; GFX950-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v5, v5, v8
76+
; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v5, v2
77+
; GFX950-SAFE-SDAG-NEXT: v_add_u32_e32 v4, 0xfffffc10, v4
78+
; GFX950-SAFE-SDAG-NEXT: v_lshl_or_b32 v5, v4, 12, v1
79+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
80+
; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v2, v8, v2
81+
; GFX950-SAFE-SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 1, v4
82+
; GFX950-SAFE-SDAG-NEXT: s_mov_b32 s0, 0x5040100
83+
; GFX950-SAFE-SDAG-NEXT: s_nop 0
84+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
85+
; GFX950-SAFE-SDAG-NEXT: v_and_b32_e32 v5, 7, v2
86+
; GFX950-SAFE-SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 5, v5
87+
; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v2, 2, v2
88+
; GFX950-SAFE-SDAG-NEXT: s_nop 0
89+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
90+
; GFX950-SAFE-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 3, v5
91+
; GFX950-SAFE-SDAG-NEXT: s_nop 1
92+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
93+
; GFX950-SAFE-SDAG-NEXT: v_or_b32_e32 v5, v5, v8
94+
; GFX950-SAFE-SDAG-NEXT: v_add_u32_e32 v2, v2, v5
95+
; GFX950-SAFE-SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 31, v4
96+
; GFX950-SAFE-SDAG-NEXT: s_nop 1
97+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
98+
; GFX950-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
99+
; GFX950-SAFE-SDAG-NEXT: s_nop 1
100+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
101+
; GFX950-SAFE-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, s2, v4
102+
; GFX950-SAFE-SDAG-NEXT: s_nop 1
103+
; GFX950-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
104+
; GFX950-SAFE-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v3
105+
; GFX950-SAFE-SDAG-NEXT: v_and_or_b32 v1, v2, s3, v1
106+
; GFX950-SAFE-SDAG-NEXT: v_perm_b32 v0, v1, v0, s0
107+
; GFX950-SAFE-SDAG-NEXT: s_setpc_b64 s[30:31]
108+
;
109+
; GFX950-SAFE-GISEL-LABEL: v_test_cvt_v2f64_v2f16:
110+
; GFX950-SAFE-GISEL: ; %bb.0:
111+
; GFX950-SAFE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; GFX950-SAFE-GISEL-NEXT: s_movk_i32 s0, 0x1ff
113+
; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v0, v1, s0, v0
114+
; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
115+
; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v1
116+
; GFX950-SAFE-GISEL-NEXT: s_movk_i32 s1, 0xffe
117+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
118+
; GFX950-SAFE-GISEL-NEXT: v_bfe_u32 v5, v1, 20, 11
119+
; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v0, v4, s1, v0
120+
; GFX950-SAFE-GISEL-NEXT: v_sub_u32_e32 v6, 0x3f1, v5
121+
; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v4, 0x1000, v0
122+
; GFX950-SAFE-GISEL-NEXT: v_med3_i32 v6, v6, 0, 13
123+
; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v7, v6, v4
124+
; GFX950-SAFE-GISEL-NEXT: v_lshlrev_b32_e32 v6, v6, v7
125+
; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, v6, v4
126+
; GFX950-SAFE-GISEL-NEXT: v_add_u32_e32 v5, 0xfffffc10, v5
127+
; GFX950-SAFE-GISEL-NEXT: v_lshl_or_b32 v6, v5, 12, v0
128+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
129+
; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v4, v7, v4
130+
; GFX950-SAFE-GISEL-NEXT: v_cmp_gt_i32_e32 vcc, 1, v5
131+
; GFX950-SAFE-GISEL-NEXT: s_movk_i32 s2, 0x40f
132+
; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v1
133+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
134+
; GFX950-SAFE-GISEL-NEXT: v_and_b32_e32 v6, 7, v4
135+
; GFX950-SAFE-GISEL-NEXT: v_cmp_lt_i32_e32 vcc, 5, v6
136+
; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v4
137+
; GFX950-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x8000
138+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
139+
; GFX950-SAFE-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v6
140+
; GFX950-SAFE-GISEL-NEXT: s_nop 1
141+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
142+
; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v6, v6, v7
143+
; GFX950-SAFE-GISEL-NEXT: v_add_u32_e32 v4, v4, v6
144+
; GFX950-SAFE-GISEL-NEXT: v_mov_b32_e32 v6, 0x7c00
145+
; GFX950-SAFE-GISEL-NEXT: v_cmp_gt_i32_e32 vcc, 31, v5
146+
; GFX950-SAFE-GISEL-NEXT: v_mov_b32_e32 v7, 0x7e00
147+
; GFX950-SAFE-GISEL-NEXT: s_nop 0
148+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc
149+
; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
150+
; GFX950-SAFE-GISEL-NEXT: s_nop 1
151+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc
152+
; GFX950-SAFE-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s2, v5
153+
; GFX950-SAFE-GISEL-NEXT: s_nop 1
154+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
155+
; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v0, v1, s3, v0
156+
; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v1, v3, s0, v2
157+
; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
158+
; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v2, 8, v3
159+
; GFX950-SAFE-GISEL-NEXT: v_bfe_u32 v4, v3, 20, 11
160+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
161+
; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v1, v2, s1, v1
162+
; GFX950-SAFE-GISEL-NEXT: v_sub_u32_e32 v5, 0x3f1, v4
163+
; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v2, 0x1000, v1
164+
; GFX950-SAFE-GISEL-NEXT: v_med3_i32 v5, v5, 0, 13
165+
; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v8, v5, v2
166+
; GFX950-SAFE-GISEL-NEXT: v_lshlrev_b32_e32 v5, v5, v8
167+
; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, v5, v2
168+
; GFX950-SAFE-GISEL-NEXT: v_add_u32_e32 v4, 0xfffffc10, v4
169+
; GFX950-SAFE-GISEL-NEXT: v_lshl_or_b32 v5, v4, 12, v1
170+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
171+
; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v2, v8, v2
172+
; GFX950-SAFE-GISEL-NEXT: v_cmp_gt_i32_e32 vcc, 1, v4
173+
; GFX950-SAFE-GISEL-NEXT: s_mov_b32 s0, 0x5040100
174+
; GFX950-SAFE-GISEL-NEXT: s_nop 0
175+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
176+
; GFX950-SAFE-GISEL-NEXT: v_and_b32_e32 v5, 7, v2
177+
; GFX950-SAFE-GISEL-NEXT: v_cmp_lt_i32_e32 vcc, 5, v5
178+
; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v2, 2, v2
179+
; GFX950-SAFE-GISEL-NEXT: s_nop 0
180+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
181+
; GFX950-SAFE-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 3, v5
182+
; GFX950-SAFE-GISEL-NEXT: s_nop 1
183+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
184+
; GFX950-SAFE-GISEL-NEXT: v_or_b32_e32 v5, v5, v8
185+
; GFX950-SAFE-GISEL-NEXT: v_add_u32_e32 v2, v2, v5
186+
; GFX950-SAFE-GISEL-NEXT: v_cmp_gt_i32_e32 vcc, 31, v4
187+
; GFX950-SAFE-GISEL-NEXT: s_nop 1
188+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
189+
; GFX950-SAFE-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
190+
; GFX950-SAFE-GISEL-NEXT: s_nop 1
191+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v7, vcc
192+
; GFX950-SAFE-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s2, v4
193+
; GFX950-SAFE-GISEL-NEXT: s_nop 1
194+
; GFX950-SAFE-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
195+
; GFX950-SAFE-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
196+
; GFX950-SAFE-GISEL-NEXT: v_and_or_b32 v1, v2, s3, v1
197+
; GFX950-SAFE-GISEL-NEXT: v_perm_b32 v0, v1, v0, s0
198+
; GFX950-SAFE-GISEL-NEXT: s_setpc_b64 s[30:31]
199+
;
200+
; GFX950-UNSAFE-LABEL: v_test_cvt_v2f64_v2f16:
201+
; GFX950-UNSAFE: ; %bb.0:
202+
; GFX950-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203+
; GFX950-UNSAFE-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
204+
; GFX950-UNSAFE-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
205+
; GFX950-UNSAFE-NEXT: v_cvt_f16_f32_e32 v0, v0
206+
; GFX950-UNSAFE-NEXT: v_cvt_f16_f32_e32 v1, v1
207+
; GFX950-UNSAFE-NEXT: s_mov_b32 s0, 0x5040100
208+
; GFX950-UNSAFE-NEXT: v_perm_b32 v0, v1, v0, s0
209+
; GFX950-UNSAFE-NEXT: s_setpc_b64 s[30:31]
210+
%res = fptrunc <2 x double> %src to <2 x half>
211+
ret <2 x half> %res
212+
}
213+

0 commit comments

Comments
 (0)