@@ -12,18 +12,272 @@ define <2 x half> @v_test_cvt_v2f32_v2f16(<2 x float> %src) {
1212 ret <2 x half > %res
1313}
1414
15- define half @fptrunc_v2f32_v2f16_then_extract (< 2 x float > %src ) {
16- ; GFX950-LABEL: fptrunc_v2f32_v2f16_then_extract :
15+ define < 3 x half > @v_test_cvt_v3f32_v3f16 (< 3 x float > %src ) {
16+ ; GFX950-LABEL: v_test_cvt_v3f32_v3f16 :
1717; GFX950: ; %bb.0:
1818; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19+ ; GFX950-NEXT: v_cvt_f16_f32_e32 v2, v2
1920; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
20- ; GFX950-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
21+ ; GFX950-NEXT: v_mov_b32_e32 v1, v2
22+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
23+ %res = fptrunc <3 x float > %src to <3 x half >
24+ ret <3 x half > %res
25+ }
26+
27+ define <4 x half > @v_test_cvt_v4f32_v4f16 (<4 x float > %src ) {
28+ ; GFX950-LABEL: v_test_cvt_v4f32_v4f16:
29+ ; GFX950: ; %bb.0:
30+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
32+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
33+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
34+ %res = fptrunc <4 x float > %src to <4 x half >
35+ ret <4 x half > %res
36+ }
37+
38+ define <8 x half > @v_test_cvt_v8f32_v2f16 (<8 x float > %src ) {
39+ ; GFX950-LABEL: v_test_cvt_v8f32_v2f16:
40+ ; GFX950: ; %bb.0:
41+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
43+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
44+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v2, v4, v5
45+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v3, v6, v7
46+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
47+ %res = fptrunc <8 x float > %src to <8 x half >
48+ ret <8 x half > %res
49+ }
50+
51+ define <16 x half > @v_test_cvt_v16f32_v16f16 (<16 x float > %src ) {
52+ ; GFX950-LABEL: v_test_cvt_v16f32_v16f16:
53+ ; GFX950: ; %bb.0:
54+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
56+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
57+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v2, v4, v5
58+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v3, v6, v7
59+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v4, v8, v9
60+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v5, v10, v11
61+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v6, v12, v13
62+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v7, v14, v15
63+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
64+ %res = fptrunc <16 x float > %src to <16 x half >
65+ ret <16 x half > %res
66+ }
67+
68+ define half @fptrunc_v2f32_v2f16_extract_uses (<2 x float > %src ) {
69+ ; GFX950-LABEL: fptrunc_v2f32_v2f16_extract_uses:
70+ ; GFX950: ; %bb.0:
71+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+ ; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
73+ ; GFX950-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2174; GFX950-NEXT: s_setpc_b64 s[30:31]
2275 %vec_half = fptrunc <2 x float > %src to <2 x half >
23- %first = extractelement <2 x half > %vec_half , i64 1
24- %second = extractelement <2 x half > %vec_half , i64 0
25- %res = fadd half %first , %second
26- ret half %res
76+ %f0 = extractelement <2 x half > %vec_half , i64 0
77+ %f1 = extractelement <2 x half > %vec_half , i64 1
78+ %rslt = fadd half %f0 , %f1
79+ ret half %rslt
80+ }
81+
82+ define half @fptrunc_v3f32_v3f16_extract_uses (<3 x float > %vec_float ) {
83+ ; GFX950-SDAG-LABEL: fptrunc_v3f32_v3f16_extract_uses:
84+ ; GFX950-SDAG: ; %bb.0:
85+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86+ ; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
87+ ; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
88+ ; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
89+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
90+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v2, v0
91+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
92+ ;
93+ ; GFX950-GISEL-LABEL: fptrunc_v3f32_v3f16_extract_uses:
94+ ; GFX950-GISEL: ; %bb.0:
95+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96+ ; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
97+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
98+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
99+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v2, v0
100+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
101+ %vec_half = fptrunc <3 x float > %vec_float to <3 x half >
102+ %f0 = extractelement <3 x half > %vec_half , i64 0
103+ %f1 = extractelement <3 x half > %vec_half , i64 1
104+ %f2 = extractelement <3 x half > %vec_half , i64 2
105+ %sum0 = fadd half %f0 , %f1
106+ %rslt = fadd half %f2 , %sum0
107+ ret half %rslt
108+ }
109+
110+ define half @fptrunc_v4f32_v4f16_extract_uses (<4 x float > %vec_float ) {
111+ ; GFX950-SDAG-LABEL: fptrunc_v4f32_v4f16_extract_uses:
112+ ; GFX950-SDAG: ; %bb.0:
113+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v2, v2, v3
115+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
116+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
117+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v1, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
118+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
119+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
120+ ;
121+ ; GFX950-GISEL-LABEL: fptrunc_v4f32_v4f16_extract_uses:
122+ ; GFX950-GISEL: ; %bb.0:
123+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
125+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
126+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
127+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
128+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
129+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
130+ %vec_half = fptrunc <4 x float > %vec_float to <4 x half >
131+ %f0 = extractelement <4 x half > %vec_half , i64 0
132+ %f1 = extractelement <4 x half > %vec_half , i64 1
133+ %f2 = extractelement <4 x half > %vec_half , i64 2
134+ %f3 = extractelement <4 x half > %vec_half , i64 3
135+ %sum0 = fadd half %f0 , %f1
136+ %sum1 = fadd half %f2 , %f3
137+ %rslt = fadd half %sum0 , %sum1
138+ ret half %rslt
139+ }
140+
141+ define half @fptrunc_v8f32_v8f16_extract_uses (<8 x float > %vec_float ) {
142+ ; GFX950-SDAG-LABEL: fptrunc_v8f32_v8f16_extract_uses:
143+ ; GFX950-SDAG: ; %bb.0:
144+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v6, v6, v7
146+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v4, v4, v5
147+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v2, v2, v3
148+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
149+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
150+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v1, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
151+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v2, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
152+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v3, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
153+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
154+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v1, v2, v3
155+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
156+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
157+ ;
158+ ; GFX950-GISEL-LABEL: fptrunc_v8f32_v8f16_extract_uses:
159+ ; GFX950-GISEL: ; %bb.0:
160+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
162+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
163+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v2, v4, v5
164+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v3, v6, v7
165+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
166+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
167+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
168+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
169+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
170+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v1, v2, v3
171+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
172+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
173+ %vec_half = fptrunc <8 x float > %vec_float to <8 x half >
174+ %f0 = extractelement <8 x half > %vec_half , i64 0
175+ %f1 = extractelement <8 x half > %vec_half , i64 1
176+ %f2 = extractelement <8 x half > %vec_half , i64 2
177+ %f3 = extractelement <8 x half > %vec_half , i64 3
178+ %f4 = extractelement <8 x half > %vec_half , i64 4
179+ %f5 = extractelement <8 x half > %vec_half , i64 5
180+ %f6 = extractelement <8 x half > %vec_half , i64 6
181+ %f7 = extractelement <8 x half > %vec_half , i64 7
182+ %sum0 = fadd half %f0 , %f1
183+ %sum1 = fadd half %f2 , %f3
184+ %sum2 = fadd half %f4 , %f5
185+ %sum3 = fadd half %f6 , %f7
186+ %sum4 = fadd half %sum0 , %sum1
187+ %sum5 = fadd half %sum2 , %sum3
188+ %rslt = fadd half %sum4 , %sum5
189+ ret half %rslt
190+ }
191+
192+ define half @fptrunc_v16f32_v16f16_extract_uses (<16 x float > %vec_float ) {
193+ ; GFX950-SDAG-LABEL: fptrunc_v16f32_v16f16_extract_uses:
194+ ; GFX950-SDAG: ; %bb.0:
195+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v14, v14, v15
197+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v12, v12, v13
198+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v10, v10, v11
199+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v8, v8, v9
200+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v6, v6, v7
201+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v4, v4, v5
202+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v2, v2, v3
203+ ; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
204+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
205+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v1, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
206+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v2, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
207+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v3, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
208+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v4, v8, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
209+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v5, v10, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
210+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v6, v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
211+ ; GFX950-SDAG-NEXT: v_add_f16_sdwa v7, v14, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
212+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
213+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v1, v2, v3
214+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v2, v4, v5
215+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v3, v6, v7
216+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
217+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v1, v2, v3
218+ ; GFX950-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
219+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
220+ ;
221+ ; GFX950-GISEL-LABEL: fptrunc_v16f32_v16f16_extract_uses:
222+ ; GFX950-GISEL: ; %bb.0:
223+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
225+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v1, v2, v3
226+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v2, v4, v5
227+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v3, v6, v7
228+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v4, v8, v9
229+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v5, v10, v11
230+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v6, v12, v13
231+ ; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v7, v14, v15
232+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
233+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
234+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v2, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
235+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v3, v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
236+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v4, v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
237+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v5, v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
238+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v6, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
239+ ; GFX950-GISEL-NEXT: v_add_f16_sdwa v7, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
240+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
241+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v1, v2, v3
242+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v2, v4, v5
243+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v3, v6, v7
244+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
245+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v1, v2, v3
246+ ; GFX950-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
247+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
248+ %vec_half = fptrunc <16 x float > %vec_float to <16 x half >
249+ %f0 = extractelement <16 x half > %vec_half , i64 0
250+ %f1 = extractelement <16 x half > %vec_half , i64 1
251+ %f2 = extractelement <16 x half > %vec_half , i64 2
252+ %f3 = extractelement <16 x half > %vec_half , i64 3
253+ %f4 = extractelement <16 x half > %vec_half , i64 4
254+ %f5 = extractelement <16 x half > %vec_half , i64 5
255+ %f6 = extractelement <16 x half > %vec_half , i64 6
256+ %f7 = extractelement <16 x half > %vec_half , i64 7
257+ %f8 = extractelement <16 x half > %vec_half , i64 8
258+ %f9 = extractelement <16 x half > %vec_half , i64 9
259+ %f10 = extractelement <16 x half > %vec_half , i64 10
260+ %f11 = extractelement <16 x half > %vec_half , i64 11
261+ %f12 = extractelement <16 x half > %vec_half , i64 12
262+ %f13 = extractelement <16 x half > %vec_half , i64 13
263+ %f14 = extractelement <16 x half > %vec_half , i64 14
264+ %f15 = extractelement <16 x half > %vec_half , i64 15
265+ %sum0 = fadd half %f0 , %f1
266+ %sum1 = fadd half %f2 , %f3
267+ %sum2 = fadd half %f4 , %f5
268+ %sum3 = fadd half %f6 , %f7
269+ %sum4 = fadd half %f8 , %f9
270+ %sum5 = fadd half %f10 , %f11
271+ %sum6 = fadd half %f12 , %f13
272+ %sum7 = fadd half %f14 , %f15
273+ %sum8 = fadd half %sum0 , %sum1
274+ %sum9 = fadd half %sum2 , %sum3
275+ %sum10 = fadd half %sum4 , %sum5
276+ %sum11 = fadd half %sum6 , %sum7
277+ %sum12 = fadd half %sum8 , %sum9
278+ %sum13 = fadd half %sum10 , %sum11
279+ %rslt = fadd half %sum12 , %sum13
280+ ret half %rslt
27281}
28282
29283define <2 x half > @v_test_cvt_v2f64_v2f16 (<2 x double > %src ) {
0 commit comments