Skip to content

Commit b6aba3e

Browse files
[pre-commit] Update the test check affected after adding pass to llc
1 parent 4030b27 commit b6aba3e

20 files changed

+962
-1977
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll

Lines changed: 112 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -89,17 +89,15 @@ define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_non_compare(i32 %v) {
8989
; CHECK-LABEL: branch_divergent_ballot_ne_zero_non_compare:
9090
; CHECK: ; %bb.0:
9191
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
92-
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
93-
; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
94-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
95-
; CHECK-NEXT: s_cbranch_scc1 .LBB7_2
96-
; CHECK-NEXT: ; %bb.1: ; %true
9792
; CHECK-NEXT: s_mov_b32 s0, 42
98-
; CHECK-NEXT: s_branch .LBB7_3
99-
; CHECK-NEXT: .LBB7_2: ; %false
93+
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
94+
; CHECK-NEXT: s_xor_b32 s2, vcc_lo, -1
95+
; CHECK-NEXT: s_and_saveexec_b32 s1, s2
96+
; CHECK-NEXT: ; %bb.1: ; %false
10097
; CHECK-NEXT: s_mov_b32 s0, 33
101-
; CHECK-NEXT: s_branch .LBB7_3
102-
; CHECK-NEXT: .LBB7_3:
98+
; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
99+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
100+
; CHECK-NEXT: ; return to shader part epilog
103101
%c = trunc i32 %v to i1
104102
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
105103
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -113,9 +111,9 @@ false:
113111
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
114112
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
115113
; CHECK: ; %bb.0:
116-
; CHECK-NEXT: s_and_b32 s0, 1, s0
117-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
118-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
114+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
115+
; CHECK-NEXT: s_and_b32 s0, s0, 1
116+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
119117
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
120118
; CHECK-NEXT: ; %bb.1: ; %true
121119
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -135,20 +133,29 @@ false:
135133
}
136134

137135
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
138-
; CHECK-LABEL: branch_divergent_ballot_eq_zero_non_compare:
139-
; CHECK: ; %bb.0:
140-
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
141-
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
142-
; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
143-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
144-
; CHECK-NEXT: s_cbranch_scc0 .LBB9_2
145-
; CHECK-NEXT: ; %bb.1: ; %false
146-
; CHECK-NEXT: s_mov_b32 s0, 33
147-
; CHECK-NEXT: s_branch .LBB9_3
148-
; CHECK-NEXT: .LBB9_2: ; %true
149-
; CHECK-NEXT: s_mov_b32 s0, 42
150-
; CHECK-NEXT: s_branch .LBB9_3
151-
; CHECK-NEXT: .LBB9_3:
136+
; GFX10-LABEL: branch_divergent_ballot_eq_zero_non_compare:
137+
; GFX10: ; %bb.0:
138+
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
139+
; GFX10-NEXT: s_mov_b32 s0, 42
140+
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
141+
; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo
142+
; GFX10-NEXT: ; %bb.1: ; %false
143+
; GFX10-NEXT: s_mov_b32 s0, 33
144+
; GFX10-NEXT: ; %bb.2: ; %UnifiedReturnBlock
145+
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
146+
; GFX10-NEXT: ; return to shader part epilog
147+
;
148+
; GFX11-LABEL: branch_divergent_ballot_eq_zero_non_compare:
149+
; GFX11: ; %bb.0:
150+
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
151+
; GFX11-NEXT: s_mov_b32 s0, 42
152+
; GFX11-NEXT: s_mov_b32 s1, exec_lo
153+
; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v0
154+
; GFX11-NEXT: ; %bb.1: ; %false
155+
; GFX11-NEXT: s_mov_b32 s0, 33
156+
; GFX11-NEXT: ; %bb.2: ; %UnifiedReturnBlock
157+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1
158+
; GFX11-NEXT: ; return to shader part epilog
152159
%c = trunc i32 %v to i1
153160
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
154161
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -162,16 +169,17 @@ false:
162169
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
163170
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
164171
; CHECK: ; %bb.0:
165-
; CHECK-NEXT: s_and_b32 s0, 1, s0
166-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
172+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
173+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
174+
; CHECK-NEXT: s_and_b32 s0, s0, 1
167175
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
168-
; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
169-
; CHECK-NEXT: ; %bb.1: ; %false
170-
; CHECK-NEXT: s_mov_b32 s0, 33
171-
; CHECK-NEXT: s_branch .LBB10_3
172-
; CHECK-NEXT: .LBB10_2: ; %true
176+
; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
177+
; CHECK-NEXT: ; %bb.1: ; %true
173178
; CHECK-NEXT: s_mov_b32 s0, 42
174179
; CHECK-NEXT: s_branch .LBB10_3
180+
; CHECK-NEXT: .LBB10_2: ; %false
181+
; CHECK-NEXT: s_mov_b32 s0, 33
182+
; CHECK-NEXT: s_branch .LBB10_3
175183
; CHECK-NEXT: .LBB10_3:
176184
%c = trunc i32 %v to i1
177185
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -184,18 +192,27 @@ false:
184192
}
185193

186194
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_compare(i32 %v) {
187-
; CHECK-LABEL: branch_divergent_ballot_ne_zero_compare:
188-
; CHECK: ; %bb.0:
189-
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
190-
; CHECK-NEXT: s_cmp_eq_u32 vcc_lo, 0
191-
; CHECK-NEXT: s_cbranch_scc1 .LBB11_2
192-
; CHECK-NEXT: ; %bb.1: ; %true
193-
; CHECK-NEXT: s_mov_b32 s0, 42
194-
; CHECK-NEXT: s_branch .LBB11_3
195-
; CHECK-NEXT: .LBB11_2: ; %false
196-
; CHECK-NEXT: s_mov_b32 s0, 33
197-
; CHECK-NEXT: s_branch .LBB11_3
198-
; CHECK-NEXT: .LBB11_3:
195+
; GFX10-LABEL: branch_divergent_ballot_ne_zero_compare:
196+
; GFX10: ; %bb.0:
197+
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 12, v0
198+
; GFX10-NEXT: s_mov_b32 s0, 42
199+
; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo
200+
; GFX10-NEXT: ; %bb.1: ; %false
201+
; GFX10-NEXT: s_mov_b32 s0, 33
202+
; GFX10-NEXT: ; %bb.2: ; %UnifiedReturnBlock
203+
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
204+
; GFX10-NEXT: ; return to shader part epilog
205+
;
206+
; GFX11-LABEL: branch_divergent_ballot_ne_zero_compare:
207+
; GFX11: ; %bb.0:
208+
; GFX11-NEXT: s_mov_b32 s0, 42
209+
; GFX11-NEXT: s_mov_b32 s1, exec_lo
210+
; GFX11-NEXT: v_cmpx_le_u32_e32 12, v0
211+
; GFX11-NEXT: ; %bb.1: ; %false
212+
; GFX11-NEXT: s_mov_b32 s0, 33
213+
; GFX11-NEXT: ; %bb.2: ; %UnifiedReturnBlock
214+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1
215+
; GFX11-NEXT: ; return to shader part epilog
199216
%c = icmp ult i32 %v, 12
200217
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
201218
%ballot_ne_zero = icmp ne i32 %ballot, 0
@@ -209,11 +226,7 @@ false:
209226
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
210227
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
211228
; CHECK: ; %bb.0:
212-
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
213-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
214-
; CHECK-NEXT: s_and_b32 s0, 1, s0
215-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
216-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
229+
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
217230
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
218231
; CHECK-NEXT: ; %bb.1: ; %true
219232
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -233,18 +246,27 @@ false:
233246
}
234247

235248
define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_compare(i32 %v) {
236-
; CHECK-LABEL: branch_divergent_ballot_eq_zero_compare:
237-
; CHECK: ; %bb.0:
238-
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
239-
; CHECK-NEXT: s_cmp_lg_u32 vcc_lo, 0
240-
; CHECK-NEXT: s_cbranch_scc0 .LBB13_2
241-
; CHECK-NEXT: ; %bb.1: ; %false
242-
; CHECK-NEXT: s_mov_b32 s0, 33
243-
; CHECK-NEXT: s_branch .LBB13_3
244-
; CHECK-NEXT: .LBB13_2: ; %true
245-
; CHECK-NEXT: s_mov_b32 s0, 42
246-
; CHECK-NEXT: s_branch .LBB13_3
247-
; CHECK-NEXT: .LBB13_3:
249+
; GFX10-LABEL: branch_divergent_ballot_eq_zero_compare:
250+
; GFX10: ; %bb.0:
251+
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
252+
; GFX10-NEXT: s_mov_b32 s0, 42
253+
; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo
254+
; GFX10-NEXT: ; %bb.1: ; %false
255+
; GFX10-NEXT: s_mov_b32 s0, 33
256+
; GFX10-NEXT: ; %bb.2: ; %UnifiedReturnBlock
257+
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1
258+
; GFX10-NEXT: ; return to shader part epilog
259+
;
260+
; GFX11-LABEL: branch_divergent_ballot_eq_zero_compare:
261+
; GFX11: ; %bb.0:
262+
; GFX11-NEXT: s_mov_b32 s0, 42
263+
; GFX11-NEXT: s_mov_b32 s1, exec_lo
264+
; GFX11-NEXT: v_cmpx_gt_u32_e32 12, v0
265+
; GFX11-NEXT: ; %bb.1: ; %false
266+
; GFX11-NEXT: s_mov_b32 s0, 33
267+
; GFX11-NEXT: ; %bb.2: ; %UnifiedReturnBlock
268+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1
269+
; GFX11-NEXT: ; return to shader part epilog
248270
%c = icmp ult i32 %v, 12
249271
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
250272
%ballot_eq_zero = icmp eq i32 %ballot, 0
@@ -259,17 +281,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
259281
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
260282
; CHECK: ; %bb.0:
261283
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
262-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
263-
; CHECK-NEXT: s_and_b32 s0, 1, s0
264-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
265-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
266-
; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
267-
; CHECK-NEXT: ; %bb.1: ; %false
268-
; CHECK-NEXT: s_mov_b32 s0, 33
269-
; CHECK-NEXT: s_branch .LBB14_3
270-
; CHECK-NEXT: .LBB14_2: ; %true
284+
; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
285+
; CHECK-NEXT: ; %bb.1: ; %true
271286
; CHECK-NEXT: s_mov_b32 s0, 42
272287
; CHECK-NEXT: s_branch .LBB14_3
288+
; CHECK-NEXT: .LBB14_2: ; %false
289+
; CHECK-NEXT: s_mov_b32 s0, 33
290+
; CHECK-NEXT: s_branch .LBB14_3
273291
; CHECK-NEXT: .LBB14_3:
274292
%c = icmp ult i32 %v, 12
275293
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -284,18 +302,16 @@ false:
284302
define amdgpu_cs i32 @branch_divergent_ballot_ne_zero_and(i32 %v1, i32 %v2) {
285303
; CHECK-LABEL: branch_divergent_ballot_ne_zero_and:
286304
; CHECK: ; %bb.0:
287-
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
288-
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
289-
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
290-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
291-
; CHECK-NEXT: s_cbranch_scc1 .LBB15_2
292-
; CHECK-NEXT: ; %bb.1: ; %true
305+
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, 12, v0
306+
; CHECK-NEXT: v_cmp_ge_u32_e64 s0, 34, v1
307+
; CHECK-NEXT: s_or_b32 s2, vcc_lo, s0
293308
; CHECK-NEXT: s_mov_b32 s0, 42
294-
; CHECK-NEXT: s_branch .LBB15_3
295-
; CHECK-NEXT: .LBB15_2: ; %false
309+
; CHECK-NEXT: s_and_saveexec_b32 s1, s2
310+
; CHECK-NEXT: ; %bb.1: ; %false
296311
; CHECK-NEXT: s_mov_b32 s0, 33
297-
; CHECK-NEXT: s_branch .LBB15_3
298-
; CHECK-NEXT: .LBB15_3:
312+
; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
313+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
314+
; CHECK-NEXT: ; return to shader part epilog
299315
%v1c = icmp ult i32 %v1, 12
300316
%v2c = icmp ugt i32 %v2, 34
301317
%c = and i1 %v1c, %v2c
@@ -311,14 +327,12 @@ false:
311327
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
312328
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
313329
; CHECK: ; %bb.0:
314-
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
330+
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
315331
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
316-
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
332+
; CHECK-NEXT: s_cmp_le_u32 s1, 34
317333
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
318-
; CHECK-NEXT: s_and_b32 s0, s0, s1
319-
; CHECK-NEXT: s_and_b32 s0, 1, s0
320-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
321-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
334+
; CHECK-NEXT: s_or_b32 s0, s0, s1
335+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
322336
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
323337
; CHECK-NEXT: ; %bb.1: ; %true
324338
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -344,16 +358,14 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
344358
; CHECK: ; %bb.0:
345359
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
346360
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
347-
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
348-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
349-
; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
361+
; CHECK-NEXT: s_and_b32 s2, vcc_lo, s0
362+
; CHECK-NEXT: s_mov_b32 s0, 42
363+
; CHECK-NEXT: s_and_saveexec_b32 s1, s2
350364
; CHECK-NEXT: ; %bb.1: ; %false
351365
; CHECK-NEXT: s_mov_b32 s0, 33
352-
; CHECK-NEXT: s_branch .LBB17_3
353-
; CHECK-NEXT: .LBB17_2: ; %true
354-
; CHECK-NEXT: s_mov_b32 s0, 42
355-
; CHECK-NEXT: s_branch .LBB17_3
356-
; CHECK-NEXT: .LBB17_3:
366+
; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
367+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
368+
; CHECK-NEXT: ; return to shader part epilog
357369
%v1c = icmp ult i32 %v1, 12
358370
%v2c = icmp ugt i32 %v2, 34
359371
%c = and i1 %v1c, %v2c
@@ -374,16 +386,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
374386
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
375387
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
376388
; CHECK-NEXT: s_and_b32 s0, s0, s1
377-
; CHECK-NEXT: s_and_b32 s0, 1, s0
378-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
379389
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
380-
; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
381-
; CHECK-NEXT: ; %bb.1: ; %false
382-
; CHECK-NEXT: s_mov_b32 s0, 33
383-
; CHECK-NEXT: s_branch .LBB18_3
384-
; CHECK-NEXT: .LBB18_2: ; %true
390+
; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
391+
; CHECK-NEXT: ; %bb.1: ; %true
385392
; CHECK-NEXT: s_mov_b32 s0, 42
386393
; CHECK-NEXT: s_branch .LBB18_3
394+
; CHECK-NEXT: .LBB18_2: ; %false
395+
; CHECK-NEXT: s_mov_b32 s0, 33
396+
; CHECK-NEXT: s_branch .LBB18_3
387397
; CHECK-NEXT: .LBB18_3:
388398
%v1c = icmp ult i32 %v1, 12
389399
%v2c = icmp ugt i32 %v2, 34

0 commit comments

Comments
 (0)