Skip to content

Commit fb2d1bb

Browse files
[pre-commit] Update the test check affected after adding pass to llc
1 parent a21521a commit fb2d1bb

20 files changed

+931
-1960
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll

Lines changed: 86 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -89,17 +89,15 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
8989
; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare:
9090
; CHECK: ; %bb.0:
9191
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
92-
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
93-
; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
94-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
95-
; CHECK-NEXT: s_cbranch_scc1 .LBB7_2
96-
; CHECK-NEXT: ; %bb.1: ; %true
9792
; CHECK-NEXT: s_mov_b32 s0, 42
98-
; CHECK-NEXT: s_branch .LBB7_3
99-
; CHECK-NEXT: .LBB7_2: ; %false
93+
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
94+
; CHECK-NEXT: s_xor_b32 s2, vcc_lo, -1
95+
; CHECK-NEXT: s_and_saveexec_b32 s1, s2
96+
; CHECK-NEXT: ; %bb.1: ; %false
10097
; CHECK-NEXT: s_mov_b32 s0, 33
101-
; CHECK-NEXT: s_branch .LBB7_3
102-
; CHECK-NEXT: .LBB7_3:
98+
; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
99+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
100+
; CHECK-NEXT: ; return to shader part epilog
103101
%c = trunc i32 %v to i1
104102
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
105103
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -113,9 +111,9 @@ false:
113111
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
114112
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
115113
; CHECK: ; %bb.0:
116-
; CHECK-NEXT: s_and_b32 s0, 1, s0
117-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
118-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
114+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
115+
; CHECK-NEXT: s_and_b32 s0, s0, 1
116+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
119117
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
120118
; CHECK-NEXT: ; %bb.1: ; %true
121119
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -161,16 +159,17 @@ false:
161159
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
162160
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
163161
; CHECK: ; %bb.0:
164-
; CHECK-NEXT: s_and_b32 s0, 1, s0
165-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
162+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
163+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
164+
; CHECK-NEXT: s_and_b32 s0, s0, 1
166165
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
167-
; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
168-
; CHECK-NEXT: ; %bb.1: ; %false
169-
; CHECK-NEXT: s_mov_b32 s0, 33
170-
; CHECK-NEXT: s_branch .LBB10_3
171-
; CHECK-NEXT: .LBB10_2: ; %true
166+
; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
167+
; CHECK-NEXT: ; %bb.1: ; %true
172168
; CHECK-NEXT: s_mov_b32 s0, 42
173169
; CHECK-NEXT: s_branch .LBB10_3
170+
; CHECK-NEXT: .LBB10_2: ; %false
171+
; CHECK-NEXT: s_mov_b32 s0, 33
172+
; CHECK-NEXT: s_branch .LBB10_3
174173
; CHECK-NEXT: .LBB10_3:
175174
%c = trunc i32 %v to i1
176175
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -183,18 +182,27 @@ false:
183182
}
184183

185184
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
186-
; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
187-
; CHECK: ; %bb.0:
188-
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
189-
; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0
190-
; CHECK-NEXT: s_cbranch_scc1 .LBB11_2
191-
; CHECK-NEXT: ; %bb.1: ; %true
192-
; CHECK-NEXT: s_mov_b32 s0, 42
193-
; CHECK-NEXT: s_branch .LBB11_3
194-
; CHECK-NEXT: .LBB11_2: ; %false
195-
; CHECK-NEXT: s_mov_b32 s0, 33
196-
; CHECK-NEXT: s_branch .LBB11_3
197-
; CHECK-NEXT: .LBB11_3:
185+
; GFX10-LABEL: branch_divergent_ballot_ne_zero_compare:
186+
; GFX10: ; %bb.0:
187+
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 12, v0
188+
; GFX10-NEXT: s_mov_b32 s0, 42
189+
; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo
190+
; GFX10-NEXT: ; %bb.1: ; %false
191+
; GFX10-NEXT: s_mov_b32 s0, 33
192+
; GFX10-NEXT: ; %bb.2: ; %UnifiedReturnBlock
193+
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
194+
; GFX10-NEXT: ; return to shader part epilog
195+
;
196+
; GFX11-LABEL: branch_divergent_ballot_ne_zero_compare:
197+
; GFX11: ; %bb.0:
198+
; GFX11-NEXT: s_mov_b32 s0, 42
199+
; GFX11-NEXT: s_mov_b32 s1, exec_lo
200+
; GFX11-NEXT: v_cmpx_le_u32_e32 12, v0
201+
; GFX11-NEXT: ; %bb.1: ; %false
202+
; GFX11-NEXT: s_mov_b32 s0, 33
203+
; GFX11-NEXT: ; %bb.2: ; %UnifiedReturnBlock
204+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1
205+
; GFX11-NEXT: ; return to shader part epilog
198206
%c = icmp ult i32 %v, 12
199207
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
200208
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -208,11 +216,7 @@ false:
208216
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
209217
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
210218
; CHECK: ; %bb.0:
211-
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
212-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
213-
; CHECK-NEXT: s_and_b32 s0, 1, s0
214-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
215-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
219+
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
216220
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
217221
; CHECK-NEXT: ; %bb.1: ; %true
218222
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -232,18 +236,27 @@ false:
232236
}
233237

234238
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
235-
; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
236-
; CHECK: ; %bb.0:
237-
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
238-
; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
239-
; CHECK-NEXT: s_cbranch_scc0 .LBB13_2
240-
; CHECK-NEXT: ; %bb.1: ; %false
241-
; CHECK-NEXT: s_mov_b32 s0, 33
242-
; CHECK-NEXT: s_branch .LBB13_3
243-
; CHECK-NEXT: .LBB13_2: ; %true
244-
; CHECK-NEXT: s_mov_b32 s0, 42
245-
; CHECK-NEXT: s_branch .LBB13_3
246-
; CHECK-NEXT: .LBB13_3:
239+
; GFX10-LABEL: branch_divergent_ballot_eq_zero_compare:
240+
; GFX10: ; %bb.0:
241+
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
242+
; GFX10-NEXT: s_mov_b32 s0, 42
243+
; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo
244+
; GFX10-NEXT: ; %bb.1: ; %false
245+
; GFX10-NEXT: s_mov_b32 s0, 33
246+
; GFX10-NEXT: ; %bb.2: ; %UnifiedReturnBlock
247+
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
248+
; GFX10-NEXT: ; return to shader part epilog
249+
;
250+
; GFX11-LABEL: branch_divergent_ballot_eq_zero_compare:
251+
; GFX11: ; %bb.0:
252+
; GFX11-NEXT: s_mov_b32 s0, 42
253+
; GFX11-NEXT: s_mov_b32 s1, exec_lo
254+
; GFX11-NEXT: v_cmpx_gt_u32_e32 12, v0
255+
; GFX11-NEXT: ; %bb.1: ; %false
256+
; GFX11-NEXT: s_mov_b32 s0, 33
257+
; GFX11-NEXT: ; %bb.2: ; %UnifiedReturnBlock
258+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1
259+
; GFX11-NEXT: ; return to shader part epilog
247260
%c = icmp ult i32 %v, 12
248261
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
249262
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -258,17 +271,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
258271
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
259272
; CHECK: ; %bb.0:
260273
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
261-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
262-
; CHECK-NEXT: s_and_b32 s0, 1, s0
263-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
264-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
265-
; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
266-
; CHECK-NEXT: ; %bb.1: ; %false
267-
; CHECK-NEXT: s_mov_b32 s0, 33
268-
; CHECK-NEXT: s_branch .LBB14_3
269-
; CHECK-NEXT: .LBB14_2: ; %true
274+
; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
275+
; CHECK-NEXT: ; %bb.1: ; %true
270276
; CHECK-NEXT: s_mov_b32 s0, 42
271277
; CHECK-NEXT: s_branch .LBB14_3
278+
; CHECK-NEXT: .LBB14_2: ; %false
279+
; CHECK-NEXT: s_mov_b32 s0, 33
280+
; CHECK-NEXT: s_branch .LBB14_3
272281
; CHECK-NEXT: .LBB14_3:
273282
%c = icmp ult i32 %v, 12
274283
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -283,18 +292,16 @@ false:
283292
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
284293
; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
285294
; CHECK: ; %bb.0:
286-
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
287-
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
288-
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
289-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
290-
; CHECK-NEXT: s_cbranch_scc1 .LBB15_2
291-
; CHECK-NEXT: ; %bb.1: ; %true
295+
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, 12, v0
296+
; CHECK-NEXT: v_cmp_ge_u32_e64 s0, 34, v1
297+
; CHECK-NEXT: s_or_b32 s2, vcc_lo, s0
292298
; CHECK-NEXT: s_mov_b32 s0, 42
293-
; CHECK-NEXT: s_branch .LBB15_3
294-
; CHECK-NEXT: .LBB15_2: ; %false
299+
; CHECK-NEXT: s_and_saveexec_b32 s1, s2
300+
; CHECK-NEXT: ; %bb.1: ; %false
295301
; CHECK-NEXT: s_mov_b32 s0, 33
296-
; CHECK-NEXT: s_branch .LBB15_3
297-
; CHECK-NEXT: .LBB15_3:
302+
; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
303+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
304+
; CHECK-NEXT: ; return to shader part epilog
298305
%v1c = icmp ult i32 %v1, 12
299306
%v2c = icmp ugt i32 %v2, 34
300307
%c = and i1 %v1c, %v2c
@@ -310,14 +317,12 @@ false:
310317
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
311318
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
312319
; CHECK: ; %bb.0:
313-
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
320+
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
314321
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
315-
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
322+
; CHECK-NEXT: s_cmp_le_u32 s1, 34
316323
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
317-
; CHECK-NEXT: s_and_b32 s0, s0, s1
318-
; CHECK-NEXT: s_and_b32 s0, 1, s0
319-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
320-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
324+
; CHECK-NEXT: s_or_b32 s0, s0, s1
325+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
321326
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
322327
; CHECK-NEXT: ; %bb.1: ; %true
323328
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -347,11 +352,9 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
347352
; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
348353
; CHECK-NEXT: ; %bb.1: ; %false
349354
; CHECK-NEXT: s_mov_b32 s0, 33
350-
; CHECK-NEXT: s_branch .LBB17_3
351-
; CHECK-NEXT: .LBB17_2: ; %true
352-
; CHECK-NEXT: s_mov_b32 s0, 42
353-
; CHECK-NEXT: s_branch .LBB17_3
354-
; CHECK-NEXT: .LBB17_3:
355+
; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
356+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
357+
; CHECK-NEXT: ; return to shader part epilog
355358
%v1c = icmp ult i32 %v1, 12
356359
%v2c = icmp ugt i32 %v2, 34
357360
%c = and i1 %v1c, %v2c
@@ -365,23 +368,15 @@ false:
365368
}
366369

367370
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg %v2) {
368-
; CHECK-LABEL: branch_uniform_ballot_eq_zero_and:
369371
; CHECK: ; %bb.0:
370372
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
371-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
372-
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
373-
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
374-
; CHECK-NEXT: s_and_b32 s0, s0, s1
375-
; CHECK-NEXT: s_and_b32 s0, 1, s0
376-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
377-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
378-
; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
379-
; CHECK-NEXT: ; %bb.1: ; %false
380-
; CHECK-NEXT: s_mov_b32 s0, 33
381-
; CHECK-NEXT: s_branch .LBB18_3
382-
; CHECK-NEXT: .LBB18_2: ; %true
373+
; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
374+
; CHECK-NEXT: ; %bb.1: ; %true
383375
; CHECK-NEXT: s_mov_b32 s0, 42
384376
; CHECK-NEXT: s_branch .LBB18_3
377+
; CHECK-NEXT: .LBB18_2: ; %false
378+
; CHECK-NEXT: s_mov_b32 s0, 33
379+
; CHECK-NEXT: s_branch .LBB18_3
385380
; CHECK-NEXT: .LBB18_3:
386381
%v1c = icmp ult i32 %v1, 12
387382
%v2c = icmp ugt i32 %v2, 34

0 commit comments

Comments
 (0)