@@ -72,46 +72,82 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
7272
7373; Should not wait after the call before memory
7474define amdgpu_kernel void @call_no_wait_after_call (ptr addrspace (1 ) %ptr , i32 ) #0 {
75- ; GCN-LABEL: call_no_wait_after_call:
76- ; GCN: ; %bb.0:
77- ; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
78- ; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
79- ; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
80- ; GCN-NEXT: s_add_u32 s0, s0, s11
81- ; GCN-NEXT: s_addc_u32 s1, s1, 0
82- ; GCN-NEXT: s_getpc_b64 s[8:9]
83- ; GCN-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
84- ; GCN-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
85- ; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
86- ; GCN-NEXT: v_mov_b32_e32 v0, 0
87- ; GCN-NEXT: s_mov_b32 s32, 0
88- ; GCN-NEXT: v_mov_b32_e32 v40, 0
89- ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
90- ; GCN-NEXT: global_store_dword v40, v40, s[34:35]
91- ; GCN-NEXT: s_endpgm
75+ ; SDAG-LABEL: call_no_wait_after_call:
76+ ; SDAG: ; %bb.0:
77+ ; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11
78+ ; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
79+ ; SDAG-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
80+ ; SDAG-NEXT: s_add_u32 s0, s0, s11
81+ ; SDAG-NEXT: s_addc_u32 s1, s1, 0
82+ ; SDAG-NEXT: s_getpc_b64 s[8:9]
83+ ; SDAG-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
84+ ; SDAG-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
85+ ; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5]
86+ ; SDAG-NEXT: v_mov_b32_e32 v0, 0
87+ ; SDAG-NEXT: s_mov_b32 s32, 0
88+ ; SDAG-NEXT: v_mov_b32_e32 v40, 0
89+ ; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
90+ ; SDAG-NEXT: global_store_dword v40, v40, s[34:35]
91+ ; SDAG-NEXT: s_endpgm
92+ ;
93+ ; GISEL-LABEL: call_no_wait_after_call:
94+ ; GISEL: ; %bb.0:
95+ ; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11
96+ ; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
97+ ; GISEL-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
98+ ; GISEL-NEXT: s_add_u32 s0, s0, s11
99+ ; GISEL-NEXT: s_addc_u32 s1, s1, 0
100+ ; GISEL-NEXT: s_getpc_b64 s[8:9]
101+ ; GISEL-NEXT: s_add_u32 s8, s8, func@rel32@lo+4
102+ ; GISEL-NEXT: s_addc_u32 s9, s9, func@rel32@hi+12
103+ ; GISEL-NEXT: v_mov_b32_e32 v0, 0
104+ ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
105+ ; GISEL-NEXT: s_mov_b32 s32, 0
106+ ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
107+ ; GISEL-NEXT: v_mov_b32_e32 v0, 0
108+ ; GISEL-NEXT: global_store_dword v0, v0, s[34:35]
109+ ; GISEL-NEXT: s_endpgm
92110 call void @func (i32 0 )
93111 store i32 0 , ptr addrspace (1 ) %ptr
94112 ret void
95113}
96114
97115define amdgpu_kernel void @call_no_wait_after_call_return_val (ptr addrspace (1 ) %ptr , i32 ) #0 {
98- ; GCN-LABEL: call_no_wait_after_call_return_val:
99- ; GCN: ; %bb.0:
100- ; GCN-NEXT: s_add_u32 flat_scratch_lo, s8, s11
101- ; GCN-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
102- ; GCN-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
103- ; GCN-NEXT: s_add_u32 s0, s0, s11
104- ; GCN-NEXT: s_addc_u32 s1, s1, 0
105- ; GCN-NEXT: s_getpc_b64 s[8:9]
106- ; GCN-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
107- ; GCN-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
108- ; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
109- ; GCN-NEXT: v_mov_b32_e32 v0, 0
110- ; GCN-NEXT: s_mov_b32 s32, 0
111- ; GCN-NEXT: v_mov_b32_e32 v40, 0
112- ; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
113- ; GCN-NEXT: global_store_dword v40, v0, s[34:35]
114- ; GCN-NEXT: s_endpgm
116+ ; SDAG-LABEL: call_no_wait_after_call_return_val:
117+ ; SDAG: ; %bb.0:
118+ ; SDAG-NEXT: s_add_u32 flat_scratch_lo, s8, s11
119+ ; SDAG-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
120+ ; SDAG-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
121+ ; SDAG-NEXT: s_add_u32 s0, s0, s11
122+ ; SDAG-NEXT: s_addc_u32 s1, s1, 0
123+ ; SDAG-NEXT: s_getpc_b64 s[8:9]
124+ ; SDAG-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
125+ ; SDAG-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
126+ ; SDAG-NEXT: s_mov_b64 s[6:7], s[4:5]
127+ ; SDAG-NEXT: v_mov_b32_e32 v0, 0
128+ ; SDAG-NEXT: s_mov_b32 s32, 0
129+ ; SDAG-NEXT: v_mov_b32_e32 v40, 0
130+ ; SDAG-NEXT: s_swappc_b64 s[30:31], s[8:9]
131+ ; SDAG-NEXT: global_store_dword v40, v0, s[34:35]
132+ ; SDAG-NEXT: s_endpgm
133+ ;
134+ ; GISEL-LABEL: call_no_wait_after_call_return_val:
135+ ; GISEL: ; %bb.0:
136+ ; GISEL-NEXT: s_add_u32 flat_scratch_lo, s8, s11
137+ ; GISEL-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
138+ ; GISEL-NEXT: s_load_dwordx2 s[34:35], s[6:7], 0x0
139+ ; GISEL-NEXT: s_add_u32 s0, s0, s11
140+ ; GISEL-NEXT: s_addc_u32 s1, s1, 0
141+ ; GISEL-NEXT: s_getpc_b64 s[8:9]
142+ ; GISEL-NEXT: s_add_u32 s8, s8, func.return@rel32@lo+4
143+ ; GISEL-NEXT: s_addc_u32 s9, s9, func.return@rel32@hi+12
144+ ; GISEL-NEXT: v_mov_b32_e32 v0, 0
145+ ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
146+ ; GISEL-NEXT: s_mov_b32 s32, 0
147+ ; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
148+ ; GISEL-NEXT: v_mov_b32_e32 v1, 0
149+ ; GISEL-NEXT: global_store_dword v1, v0, s[34:35]
150+ ; GISEL-NEXT: s_endpgm
115151 %rv = call i32 @func.return (i32 0 )
116152 store i32 %rv , ptr addrspace (1 ) %ptr
117153 ret void
0 commit comments