Skip to content

Commit 5fda6ec

Browse files
committed
[AMDGPU][SIPreEmitPeephole] Missing condition in mustRetainExeczBranch
If the code in the "then" block is modifying the exec mask, we must retain the s_cbranch_execz branch. Consider this example: s_cbranch_execz after s_or_b32 exec_lo, exec_lo, -1 after: ... If the branch is removed, when we reach after exec is never zero, while before it would have been zero.
1 parent d3b77a9 commit 5fda6ec

File tree

6 files changed

+41
-16
lines changed

6 files changed

+41
-16
lines changed

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,9 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
369369
if (MI.isMetaInstruction())
370370
continue;
371371

372+
if (MI.modifiesRegister(AMDGPU::EXEC, nullptr))
373+
return true;
374+
372375
if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
373376
return true;
374377

llvm/test/CodeGen/AMDGPU/cse-convergent.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ define i32 @test(i32 %val, i32 %cond) {
1919
; GCN-NEXT: v_mov_b32_e32 v4, v2
2020
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
2121
; GCN-NEXT: s_and_saveexec_b32 s4, vcc_lo
22+
; GCN-NEXT: s_cbranch_execz .LBB0_2
2223
; GCN-NEXT: ; %bb.1: ; %if
2324
; GCN-NEXT: s_or_saveexec_b32 s5, -1
2425
; GCN-NEXT: v_mov_b32_e32 v2, 0
2526
; GCN-NEXT: v_cndmask_b32_e64 v3, 0, v0, s5
2627
; GCN-NEXT: v_mov_b32_dpp v2, v3 row_xmask:1 row_mask:0xf bank_mask:0xf
2728
; GCN-NEXT: s_mov_b32 exec_lo, s5
2829
; GCN-NEXT: v_mov_b32_e32 v5, v2
29-
; GCN-NEXT: ; %bb.2: ; %end
30+
; GCN-NEXT: .LBB0_2: ; %end
3031
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
3132
; GCN-NEXT: v_add_nc_u32_e32 v0, v4, v5
3233
; GCN-NEXT: s_xor_saveexec_b32 s4, -1

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w32.ll

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %cal
119119
; GISEL12-NEXT: s_mov_b32 s7, s4
120120
; GISEL12-NEXT: s_wait_alu 0xfffe
121121
; GISEL12-NEXT: s_and_saveexec_b32 s3, s8
122+
; GISEL12-NEXT: s_cbranch_execz .LBB1_2
122123
; GISEL12-NEXT: ; %bb.1: ; %shader
123124
; GISEL12-NEXT: s_or_saveexec_b32 s4, -1
124125
; GISEL12-NEXT: s_wait_alu 0xfffe
@@ -129,7 +130,8 @@ define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %cal
129130
; GISEL12-NEXT: s_mov_b32 exec_lo, s4
130131
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1)
131132
; GISEL12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_add_nc_u32 v10, 42, v10
132-
; GISEL12-NEXT: ; %bb.2: ; %tail
133+
; GISEL12-NEXT: .LBB1_2: ; %tail
134+
; GISEL12-NEXT: s_wait_alu 0xfffe
133135
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
134136
; GISEL12-NEXT: s_mov_b32 exec_lo, s5
135137
; GISEL12-NEXT: s_wait_alu 0xfffe
@@ -148,6 +150,7 @@ define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %cal
148150
; DAGISEL12-NEXT: s_mov_b32 s6, s3
149151
; DAGISEL12-NEXT: s_wait_alu 0xfffe
150152
; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8
153+
; DAGISEL12-NEXT: s_cbranch_execz .LBB1_2
151154
; DAGISEL12-NEXT: ; %bb.1: ; %shader
152155
; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1
153156
; DAGISEL12-NEXT: s_wait_alu 0xfffe
@@ -156,7 +159,8 @@ define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %cal
156159
; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
157160
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4
158161
; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v10
159-
; DAGISEL12-NEXT: ; %bb.2: ; %tail
162+
; DAGISEL12-NEXT: .LBB1_2: ; %tail
163+
; DAGISEL12-NEXT: s_wait_alu 0xfffe
160164
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
161165
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5
162166
; DAGISEL12-NEXT: s_wait_alu 0xfffe
@@ -171,6 +175,7 @@ define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %cal
171175
; GISEL10-NEXT: s_mov_b32 s6, s3
172176
; GISEL10-NEXT: s_mov_b32 s7, s4
173177
; GISEL10-NEXT: s_and_saveexec_b32 s3, s8
178+
; GISEL10-NEXT: s_cbranch_execz .LBB1_2
174179
; GISEL10-NEXT: ; %bb.1: ; %shader
175180
; GISEL10-NEXT: s_or_saveexec_b32 s4, -1
176181
; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4
@@ -179,7 +184,7 @@ define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %cal
179184
; GISEL10-NEXT: s_mov_b32 exec_lo, s4
180185
; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v10
181186
; GISEL10-NEXT: v_mov_b32_e32 v11, v0
182-
; GISEL10-NEXT: ; %bb.2: ; %tail
187+
; GISEL10-NEXT: .LBB1_2: ; %tail
183188
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
184189
; GISEL10-NEXT: s_mov_b32 exec_lo, s5
185190
; GISEL10-NEXT: s_setpc_b64 s[6:7]
@@ -193,14 +198,15 @@ define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %cal
193198
; DAGISEL10-NEXT: s_mov_b32 s7, s4
194199
; DAGISEL10-NEXT: s_mov_b32 s6, s3
195200
; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8
201+
; DAGISEL10-NEXT: s_cbranch_execz .LBB1_2
196202
; DAGISEL10-NEXT: ; %bb.1: ; %shader
197203
; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1
198204
; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4
199205
; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
200206
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4
201207
; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v10
202208
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8
203-
; DAGISEL10-NEXT: ; %bb.2: ; %tail
209+
; DAGISEL10-NEXT: .LBB1_2: ; %tail
204210
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
205211
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5
206212
; DAGISEL10-NEXT: s_setpc_b64 s[6:7]
@@ -240,6 +246,7 @@ define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %
240246
; GISEL12-NEXT: s_mov_b32 s7, s4
241247
; GISEL12-NEXT: s_wait_alu 0xfffe
242248
; GISEL12-NEXT: s_and_saveexec_b32 s3, s8
249+
; GISEL12-NEXT: s_cbranch_execz .LBB2_2
243250
; GISEL12-NEXT: ; %bb.1: ; %shader
244251
; GISEL12-NEXT: s_or_saveexec_b32 s4, -1
245252
; GISEL12-NEXT: s_wait_alu 0xfffe
@@ -250,7 +257,8 @@ define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %
250257
; GISEL12-NEXT: s_mov_b32 exec_lo, s4
251258
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1)
252259
; GISEL12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_add_nc_u32 v10, 42, v12
253-
; GISEL12-NEXT: ; %bb.2: ; %tail
260+
; GISEL12-NEXT: .LBB2_2: ; %tail
261+
; GISEL12-NEXT: s_wait_alu 0xfffe
254262
; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
255263
; GISEL12-NEXT: s_mov_b32 exec_lo, s5
256264
; GISEL12-NEXT: s_wait_alu 0xfffe
@@ -268,6 +276,7 @@ define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %
268276
; DAGISEL12-NEXT: s_mov_b32 s6, s3
269277
; DAGISEL12-NEXT: s_wait_alu 0xfffe
270278
; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8
279+
; DAGISEL12-NEXT: s_cbranch_execz .LBB2_2
271280
; DAGISEL12-NEXT: ; %bb.1: ; %shader
272281
; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1
273282
; DAGISEL12-NEXT: s_wait_alu 0xfffe
@@ -276,7 +285,8 @@ define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %
276285
; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
277286
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4
278287
; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v12
279-
; DAGISEL12-NEXT: ; %bb.2: ; %tail
288+
; DAGISEL12-NEXT: .LBB2_2: ; %tail
289+
; DAGISEL12-NEXT: s_wait_alu 0xfffe
280290
; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3
281291
; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5
282292
; DAGISEL12-NEXT: s_wait_alu 0xfffe
@@ -289,6 +299,7 @@ define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %
289299
; GISEL10-NEXT: s_mov_b32 s6, s3
290300
; GISEL10-NEXT: s_mov_b32 s7, s4
291301
; GISEL10-NEXT: s_and_saveexec_b32 s3, s8
302+
; GISEL10-NEXT: s_cbranch_execz .LBB2_2
292303
; GISEL10-NEXT: ; %bb.1: ; %shader
293304
; GISEL10-NEXT: s_or_saveexec_b32 s4, -1
294305
; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4
@@ -297,7 +308,7 @@ define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %
297308
; GISEL10-NEXT: s_mov_b32 exec_lo, s4
298309
; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12
299310
; GISEL10-NEXT: v_mov_b32_e32 v11, v0
300-
; GISEL10-NEXT: ; %bb.2: ; %tail
311+
; GISEL10-NEXT: .LBB2_2: ; %tail
301312
; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
302313
; GISEL10-NEXT: s_mov_b32 exec_lo, s5
303314
; GISEL10-NEXT: s_setpc_b64 s[6:7]
@@ -309,14 +320,15 @@ define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %
309320
; DAGISEL10-NEXT: s_mov_b32 s7, s4
310321
; DAGISEL10-NEXT: s_mov_b32 s6, s3
311322
; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8
323+
; DAGISEL10-NEXT: s_cbranch_execz .LBB2_2
312324
; DAGISEL10-NEXT: ; %bb.1: ; %shader
313325
; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1
314326
; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4
315327
; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0
316328
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4
317329
; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12
318330
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8
319-
; DAGISEL10-NEXT: ; %bb.2: ; %tail
331+
; DAGISEL10-NEXT: .LBB2_2: ; %tail
320332
; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3
321333
; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5
322334
; DAGISEL10-NEXT: s_setpc_b64 s[6:7]
@@ -390,14 +402,16 @@ define amdgpu_cs_chain void @control_flow(<3 x i32> inreg %sgpr, ptr inreg %call
390402
; GISEL12-NEXT: v_cmpx_lt_i32_e64 v12, v13
391403
; GISEL12-NEXT: s_wait_alu 0xfffe
392404
; GISEL12-NEXT: s_xor_b32 s3, exec_lo, s3
405+
; GISEL12-NEXT: s_cbranch_execz .LBB3_6
393406
; GISEL12-NEXT: ; %bb.5: ; %tail.else
394407
; GISEL12-NEXT: s_or_saveexec_b32 s4, -1
395408
; GISEL12-NEXT: v_mov_b32_e32 v0, 15
396409
; GISEL12-NEXT: s_wait_alu 0xfffe
397410
; GISEL12-NEXT: s_mov_b32 exec_lo, s4
398411
; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1)
399412
; GISEL12-NEXT: v_mov_b32_e32 v8, v0
400-
; GISEL12-NEXT: ; %bb.6: ; %Flow
413+
; GISEL12-NEXT: .LBB3_6: ; %Flow
414+
; GISEL12-NEXT: s_wait_alu 0xfffe
401415
; GISEL12-NEXT: s_and_not1_saveexec_b32 s3, s3
402416
; GISEL12-NEXT: ; %bb.7: ; %tail.then
403417
; GISEL12-NEXT: s_mov_b32 s4, 44
@@ -501,12 +515,13 @@ define amdgpu_cs_chain void @control_flow(<3 x i32> inreg %sgpr, ptr inreg %call
501515
; GISEL10-NEXT: ; implicit-def: $vgpr8
502516
; GISEL10-NEXT: v_cmpx_lt_i32_e64 v12, v13
503517
; GISEL10-NEXT: s_xor_b32 s3, exec_lo, s3
518+
; GISEL10-NEXT: s_cbranch_execz .LBB3_6
504519
; GISEL10-NEXT: ; %bb.5: ; %tail.else
505520
; GISEL10-NEXT: s_or_saveexec_b32 s4, -1
506521
; GISEL10-NEXT: v_mov_b32_e32 v0, 15
507522
; GISEL10-NEXT: s_mov_b32 exec_lo, s4
508523
; GISEL10-NEXT: v_mov_b32_e32 v8, v0
509-
; GISEL10-NEXT: ; %bb.6: ; %Flow
524+
; GISEL10-NEXT: .LBB3_6: ; %Flow
510525
; GISEL10-NEXT: s_andn2_saveexec_b32 s3, s3
511526
; GISEL10-NEXT: ; %bb.7: ; %tail.then
512527
; GISEL10-NEXT: s_mov_b32 s4, 44

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.whole.wave-w64.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i64
5757
; DAGISEL12-NEXT: s_mov_b32 s4, s3
5858
; DAGISEL12-NEXT: s_wait_alu 0xfffe
5959
; DAGISEL12-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
60+
; DAGISEL12-NEXT: s_cbranch_execz .LBB0_2
6061
; DAGISEL12-NEXT: ; %bb.1: ; %shader
6162
; DAGISEL12-NEXT: s_or_saveexec_b64 s[10:11], -1
6263
; DAGISEL12-NEXT: s_wait_alu 0xfffe
@@ -68,7 +69,8 @@ define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i64
6869
; DAGISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v13
6970
; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_3)
7071
; DAGISEL12-NEXT: v_mov_b32_e32 v12, s13
71-
; DAGISEL12-NEXT: ; %bb.2: ; %tail
72+
; DAGISEL12-NEXT: .LBB0_2: ; %tail
73+
; DAGISEL12-NEXT: s_wait_alu 0xfffe
7274
; DAGISEL12-NEXT: s_or_b64 exec, exec, s[8:9]
7375
; DAGISEL12-NEXT: s_mov_b64 exec, s[6:7]
7476
; DAGISEL12-NEXT: s_wait_alu 0xfffe
@@ -108,6 +110,7 @@ define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i64
108110
; DAGISEL10-NEXT: s_mov_b32 s5, s4
109111
; DAGISEL10-NEXT: s_mov_b32 s4, s3
110112
; DAGISEL10-NEXT: s_and_saveexec_b64 s[8:9], s[10:11]
113+
; DAGISEL10-NEXT: s_cbranch_execz .LBB0_2
111114
; DAGISEL10-NEXT: ; %bb.1: ; %shader
112115
; DAGISEL10-NEXT: s_or_saveexec_b64 s[10:11], -1
113116
; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v13, s[10:11]
@@ -116,7 +119,7 @@ define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i64
116119
; DAGISEL10-NEXT: v_mov_b32_e32 v11, s12
117120
; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v13
118121
; DAGISEL10-NEXT: v_mov_b32_e32 v12, s13
119-
; DAGISEL10-NEXT: ; %bb.2: ; %tail
122+
; DAGISEL10-NEXT: .LBB0_2: ; %tail
120123
; DAGISEL10-NEXT: s_or_b64 exec, exec, s[8:9]
121124
; DAGISEL10-NEXT: s_mov_b64 exec, s[6:7]
122125
; DAGISEL10-NEXT: s_setpc_b64 s[4:5]

llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
264264
; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1
265265
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
266266
; GFX9-O3-NEXT: s_and_saveexec_b64 s[34:35], vcc
267+
; GFX9-O3-NEXT: s_cbranch_execz .LBB1_2
267268
; GFX9-O3-NEXT: ; %bb.1: ; %if
268269
; GFX9-O3-NEXT: s_or_saveexec_b64 s[36:37], -1
269270
; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0
@@ -273,7 +274,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
273274
; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1
274275
; GFX9-O3-NEXT: s_mov_b64 exec, s[36:37]
275276
; GFX9-O3-NEXT: v_mov_b32_e32 v5, v1
276-
; GFX9-O3-NEXT: ; %bb.2: ; %merge
277+
; GFX9-O3-NEXT: .LBB1_2: ; %merge
277278
; GFX9-O3-NEXT: s_or_b64 exec, exec, s[34:35]
278279
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
279280
; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc

llvm/test/CodeGen/AMDGPU/wwm-reserved.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
230230
; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1
231231
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
232232
; GFX9-O3-NEXT: s_and_saveexec_b64 s[4:5], vcc
233+
; GFX9-O3-NEXT: s_cbranch_execz .LBB1_2
233234
; GFX9-O3-NEXT: ; %bb.1: ; %if
234235
; GFX9-O3-NEXT: s_or_saveexec_b64 s[6:7], -1
235236
; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0
@@ -239,7 +240,7 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
239240
; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1
240241
; GFX9-O3-NEXT: s_mov_b64 exec, s[6:7]
241242
; GFX9-O3-NEXT: v_mov_b32_e32 v5, v1
242-
; GFX9-O3-NEXT: ; %bb.2: ; %merge
243+
; GFX9-O3-NEXT: .LBB1_2: ; %merge
243244
; GFX9-O3-NEXT: s_or_b64 exec, exec, s[4:5]
244245
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
245246
; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
@@ -1082,6 +1083,7 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
10821083
; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1
10831084
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
10841085
; GFX9-O3-NEXT: s_and_saveexec_b64 s[4:5], vcc
1086+
; GFX9-O3-NEXT: s_cbranch_execz .LBB8_2
10851087
; GFX9-O3-NEXT: ; %bb.1: ; %if
10861088
; GFX9-O3-NEXT: s_or_saveexec_b64 s[6:7], -1
10871089
; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0
@@ -1091,7 +1093,7 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
10911093
; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1
10921094
; GFX9-O3-NEXT: s_mov_b64 exec, s[6:7]
10931095
; GFX9-O3-NEXT: v_mov_b32_e32 v5, v1
1094-
; GFX9-O3-NEXT: ; %bb.2: ; %merge
1096+
; GFX9-O3-NEXT: .LBB8_2: ; %merge
10951097
; GFX9-O3-NEXT: s_or_b64 exec, exec, s[4:5]
10961098
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
10971099
; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc

0 commit comments

Comments
 (0)