@@ -417,3 +417,86 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
417417 %ret = call i64 @llvm.amdgcn.update.dpp.i64 (i64 %x , i64 %y , i32 1 , i32 1 , i32 1 , i1 false )
418418 ret i64 %ret
419419}
420+
421+ define amdgpu_gfx_whole_wave void @inreg_args (i1 %active , i32 inreg %i32 , <4 x i32 > inreg %v4i32 , float inreg %float , ptr addrspace (5 ) inreg %ptr , ptr addrspace (5 ) inreg %ptr2 ) {
422+ ; DAGISEL-LABEL: inreg_args:
423+ ; DAGISEL: ; %bb.0:
424+ ; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
425+ ; DAGISEL-NEXT: s_wait_expcnt 0x0
426+ ; DAGISEL-NEXT: s_wait_samplecnt 0x0
427+ ; DAGISEL-NEXT: s_wait_bvhcnt 0x0
428+ ; DAGISEL-NEXT: s_wait_kmcnt 0x0
429+ ; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
430+ ; DAGISEL-NEXT: s_clause 0x5
431+ ; DAGISEL-NEXT: scratch_store_b32 off, v0, s32
432+ ; DAGISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
433+ ; DAGISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
434+ ; DAGISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
435+ ; DAGISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16
436+ ; DAGISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20
437+ ; DAGISEL-NEXT: s_mov_b32 exec_lo, -1
438+ ; DAGISEL-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s9
439+ ; DAGISEL-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
440+ ; DAGISEL-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s8
441+ ; DAGISEL-NEXT: scratch_store_b32 off, v4, s10
442+ ; DAGISEL-NEXT: s_clause 0x1
443+ ; DAGISEL-NEXT: scratch_store_b128 off, v[0:3], s11
444+ ; DAGISEL-NEXT: scratch_store_b32 off, v5, s11
445+ ; DAGISEL-NEXT: s_wait_alu 0xfffe
446+ ; DAGISEL-NEXT: s_xor_b32 exec_lo, s0, -1
447+ ; DAGISEL-NEXT: s_clause 0x5
448+ ; DAGISEL-NEXT: scratch_load_b32 v0, off, s32
449+ ; DAGISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
450+ ; DAGISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
451+ ; DAGISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
452+ ; DAGISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16
453+ ; DAGISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20
454+ ; DAGISEL-NEXT: s_mov_b32 exec_lo, s0
455+ ; DAGISEL-NEXT: s_wait_loadcnt 0x0
456+ ; DAGISEL-NEXT: s_setpc_b64 s[30:31]
457+ ;
458+ ; GISEL-LABEL: inreg_args:
459+ ; GISEL: ; %bb.0:
460+ ; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
461+ ; GISEL-NEXT: s_wait_expcnt 0x0
462+ ; GISEL-NEXT: s_wait_samplecnt 0x0
463+ ; GISEL-NEXT: s_wait_bvhcnt 0x0
464+ ; GISEL-NEXT: s_wait_kmcnt 0x0
465+ ; GISEL-NEXT: s_xor_saveexec_b32 s34, -1
466+ ; GISEL-NEXT: s_clause 0x5
467+ ; GISEL-NEXT: scratch_store_b32 off, v0, s32
468+ ; GISEL-NEXT: scratch_store_b32 off, v1, s32 offset:4
469+ ; GISEL-NEXT: scratch_store_b32 off, v2, s32 offset:8
470+ ; GISEL-NEXT: scratch_store_b32 off, v3, s32 offset:12
471+ ; GISEL-NEXT: scratch_store_b32 off, v4, s32 offset:16
472+ ; GISEL-NEXT: scratch_store_b32 off, v5, s32 offset:20
473+ ; GISEL-NEXT: s_mov_b32 exec_lo, -1
474+ ; GISEL-NEXT: s_mov_b32 s0, s5
475+ ; GISEL-NEXT: s_mov_b32 s1, s6
476+ ; GISEL-NEXT: s_mov_b32 s2, s7
477+ ; GISEL-NEXT: s_mov_b32 s3, s8
478+ ; GISEL-NEXT: v_mov_b32_e32 v4, s4
479+ ; GISEL-NEXT: s_wait_alu 0xfffe
480+ ; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
481+ ; GISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
482+ ; GISEL-NEXT: v_mov_b32_e32 v5, s9
483+ ; GISEL-NEXT: scratch_store_b32 off, v4, s10
484+ ; GISEL-NEXT: s_clause 0x1
485+ ; GISEL-NEXT: scratch_store_b128 off, v[0:3], s11
486+ ; GISEL-NEXT: scratch_store_b32 off, v5, s11
487+ ; GISEL-NEXT: s_xor_b32 exec_lo, s34, -1
488+ ; GISEL-NEXT: s_clause 0x5
489+ ; GISEL-NEXT: scratch_load_b32 v0, off, s32
490+ ; GISEL-NEXT: scratch_load_b32 v1, off, s32 offset:4
491+ ; GISEL-NEXT: scratch_load_b32 v2, off, s32 offset:8
492+ ; GISEL-NEXT: scratch_load_b32 v3, off, s32 offset:12
493+ ; GISEL-NEXT: scratch_load_b32 v4, off, s32 offset:16
494+ ; GISEL-NEXT: scratch_load_b32 v5, off, s32 offset:20
495+ ; GISEL-NEXT: s_mov_b32 exec_lo, s34
496+ ; GISEL-NEXT: s_wait_loadcnt 0x0
497+ ; GISEL-NEXT: s_setpc_b64 s[30:31]
498+ store i32 %i32 , ptr addrspace (5 ) %ptr
499+ store <4 x i32 > %v4i32 , ptr addrspace (5 ) %ptr2
500+ store float %float , ptr addrspace (5 ) %ptr2
501+ ret void
502+ }
0 commit comments