@@ -189,3 +189,150 @@ define <2 x half> @call_nofpclass_intrinsic_v2f16(float %x, float %y, float %z,
189189 %min = select nsz <2 x i1 > %lt , <2 x half > %call0 , <2 x half > %call1
190190 ret <2 x half > %min
191191}
192+
193+ define nofpclass(nan inf) { double , double } @aggregate () {
194+ ; CHECK-LABEL: aggregate:
195+ ; CHECK: ; %bb.0: ; %entry
196+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197+ ; CHECK-NEXT: s_mov_b32 s16, s33
198+ ; CHECK-NEXT: s_mov_b32 s33, s32
199+ ; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
200+ ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
201+ ; CHECK-NEXT: s_mov_b64 exec, s[18:19]
202+ ; CHECK-NEXT: s_addk_i32 s32, 0x400
203+ ; CHECK-NEXT: v_writelane_b32 v40, s16, 2
204+ ; CHECK-NEXT: s_getpc_b64 s[16:17]
205+ ; CHECK-NEXT: s_add_u32 s16, s16, aggregate@gotpcrel32@lo+4
206+ ; CHECK-NEXT: s_addc_u32 s17, s17, aggregate@gotpcrel32@hi+12
207+ ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
208+ ; CHECK-NEXT: v_writelane_b32 v40, s30, 0
209+ ; CHECK-NEXT: v_writelane_b32 v40, s31, 1
210+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
211+ ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
212+ ; CHECK-NEXT: v_readlane_b32 s31, v40, 1
213+ ; CHECK-NEXT: v_readlane_b32 s30, v40, 0
214+ ; CHECK-NEXT: s_mov_b32 s32, s33
215+ ; CHECK-NEXT: v_readlane_b32 s4, v40, 2
216+ ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
217+ ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
218+ ; CHECK-NEXT: s_mov_b64 exec, s[6:7]
219+ ; CHECK-NEXT: s_mov_b32 s33, s4
220+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
221+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
222+ entry:
223+ %call.i.i = call { double , double } @aggregate ()
224+ ret { double , double } %call.i.i
225+ }
226+
227+ declare hidden nofpclass(nan inf) { float , float } @aggregate_f32 ()
228+
229+ define { float , float } @aggregate_use (float %z ) {
230+ ; CHECK-LABEL: aggregate_use:
231+ ; CHECK: ; %bb.0:
232+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233+ ; CHECK-NEXT: s_mov_b32 s16, s33
234+ ; CHECK-NEXT: s_mov_b32 s33, s32
235+ ; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
236+ ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
237+ ; CHECK-NEXT: s_mov_b64 exec, s[18:19]
238+ ; CHECK-NEXT: v_writelane_b32 v41, s16, 2
239+ ; CHECK-NEXT: s_addk_i32 s32, 0x400
240+ ; CHECK-NEXT: v_writelane_b32 v41, s30, 0
241+ ; CHECK-NEXT: s_getpc_b64 s[16:17]
242+ ; CHECK-NEXT: s_add_u32 s16, s16, aggregate_f32@rel32@lo+4
243+ ; CHECK-NEXT: s_addc_u32 s17, s17, aggregate_f32@rel32@hi+12
244+ ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
245+ ; CHECK-NEXT: v_writelane_b32 v41, s31, 1
246+ ; CHECK-NEXT: v_mov_b32_e32 v40, v0
247+ ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
248+ ; CHECK-NEXT: v_max_f32_e32 v2, v40, v40
249+ ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
250+ ; CHECK-NEXT: v_min_f32_e32 v0, v0, v2
251+ ; CHECK-NEXT: v_min_f32_e32 v1, v1, v2
252+ ; CHECK-NEXT: v_readlane_b32 s31, v41, 1
253+ ; CHECK-NEXT: v_readlane_b32 s30, v41, 0
254+ ; CHECK-NEXT: s_mov_b32 s32, s33
255+ ; CHECK-NEXT: v_readlane_b32 s4, v41, 2
256+ ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
257+ ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
258+ ; CHECK-NEXT: s_mov_b64 exec, s[6:7]
259+ ; CHECK-NEXT: s_mov_b32 s33, s4
260+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
261+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
262+ %call = call nofpclass(nan inf) { float , float } @aggregate_f32 ()
263+ %i = extractvalue { float , float } %call , 0
264+ %i1 = extractvalue { float , float } %call , 1
265+ %min0 = call float @llvm.minnum.f32 (float %i , float %z )
266+ %min1 = call float @llvm.minnum.f32 (float %i1 , float %z )
267+ %insert.0 = insertvalue { float , float } poison, float %min0 , 0
268+ %insert.1 = insertvalue { float , float } %insert.0 , float %min1 , 1
269+ ret { float , float } %insert.1
270+ }
271+
272+ define internal <5 x double > @func_v5f64 (ptr addrspace (1 ) %ptr ) {
273+ ; CHECK-LABEL: func_v5f64:
274+ ; CHECK: ; %bb.0:
275+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276+ ; CHECK-NEXT: v_mov_b32_e32 v11, v1
277+ ; CHECK-NEXT: v_mov_b32_e32 v10, v0
278+ ; CHECK-NEXT: global_load_dwordx4 v[0:3], v[10:11], off glc
279+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
280+ ; CHECK-NEXT: global_load_dwordx4 v[4:7], v[10:11], off offset:16 glc
281+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
282+ ; CHECK-NEXT: global_load_dwordx2 v[8:9], v[10:11], off offset:32 glc
283+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
284+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
285+ %ld = load volatile <5 x double >, ptr addrspace (1 ) %ptr
286+ ret <5 x double > %ld
287+ }
288+
289+ define <5 x double > @call_nofpclass_funcs_v5f64_non_mvt_vector (ptr addrspace (1 ) %ptr ) {
290+ ; CHECK-LABEL: call_nofpclass_funcs_v5f64_non_mvt_vector:
291+ ; CHECK: ; %bb.0:
292+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293+ ; CHECK-NEXT: s_mov_b32 s18, s33
294+ ; CHECK-NEXT: s_mov_b32 s33, s32
295+ ; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
296+ ; CHECK-NEXT: buffer_store_dword v24, off, s[0:3], s33 ; 4-byte Folded Spill
297+ ; CHECK-NEXT: s_mov_b64 exec, s[16:17]
298+ ; CHECK-NEXT: s_addk_i32 s32, 0x400
299+ ; CHECK-NEXT: v_writelane_b32 v24, s30, 0
300+ ; CHECK-NEXT: s_getpc_b64 s[16:17]
301+ ; CHECK-NEXT: s_add_u32 s16, s16, func_v5f64@rel32@lo+4
302+ ; CHECK-NEXT: s_addc_u32 s17, s17, func_v5f64@rel32@hi+12
303+ ; CHECK-NEXT: v_writelane_b32 v24, s31, 1
304+ ; CHECK-NEXT: v_mov_b32_e32 v22, v1
305+ ; CHECK-NEXT: v_mov_b32_e32 v23, v0
306+ ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
307+ ; CHECK-NEXT: v_mov_b32_e32 v12, v0
308+ ; CHECK-NEXT: v_mov_b32_e32 v13, v1
309+ ; CHECK-NEXT: v_mov_b32_e32 v0, v23
310+ ; CHECK-NEXT: v_mov_b32_e32 v1, v22
311+ ; CHECK-NEXT: v_mov_b32_e32 v14, v2
312+ ; CHECK-NEXT: v_mov_b32_e32 v15, v3
313+ ; CHECK-NEXT: v_mov_b32_e32 v16, v4
314+ ; CHECK-NEXT: v_mov_b32_e32 v17, v5
315+ ; CHECK-NEXT: v_mov_b32_e32 v18, v6
316+ ; CHECK-NEXT: v_mov_b32_e32 v19, v7
317+ ; CHECK-NEXT: v_mov_b32_e32 v20, v8
318+ ; CHECK-NEXT: v_mov_b32_e32 v21, v9
319+ ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
320+ ; CHECK-NEXT: v_min_f64 v[0:1], v[12:13], v[0:1]
321+ ; CHECK-NEXT: v_min_f64 v[2:3], v[14:15], v[2:3]
322+ ; CHECK-NEXT: v_min_f64 v[4:5], v[16:17], v[4:5]
323+ ; CHECK-NEXT: v_min_f64 v[6:7], v[18:19], v[6:7]
324+ ; CHECK-NEXT: v_min_f64 v[8:9], v[20:21], v[8:9]
325+ ; CHECK-NEXT: v_readlane_b32 s31, v24, 1
326+ ; CHECK-NEXT: v_readlane_b32 s30, v24, 0
327+ ; CHECK-NEXT: s_mov_b32 s32, s33
328+ ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
329+ ; CHECK-NEXT: buffer_load_dword v24, off, s[0:3], s33 ; 4-byte Folded Reload
330+ ; CHECK-NEXT: s_mov_b64 exec, s[4:5]
331+ ; CHECK-NEXT: s_mov_b32 s33, s18
332+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
333+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
334+ %call0 = call nofpclass(nan) <5 x double > @func_v5f64 (ptr addrspace (1 ) %ptr )
335+ %call1 = call nofpclass(nan) <5 x double > @func_v5f64 (ptr addrspace (1 ) %ptr )
336+ %min = call <5 x double > @llvm.minnum.v5f64 (<5 x double > %call0 , <5 x double > %call1 )
337+ ret <5 x double > %min
338+ }
0 commit comments