Skip to content

Commit 04822ec

Browse files
committed
Revert "[AMDGPU] Enable "amdgpu-uniform-intrinsic-combine" pass in pipeline. (llvm#162819)"
This reverts commit 4d7093b.
1 parent 41cdd0b commit 04822ec

19 files changed

+439
-523
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,9 +1347,6 @@ void AMDGPUPassConfig::addIRPasses() {
13471347
isPassEnabled(EnableImageIntrinsicOptimizer))
13481348
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
13491349

1350-
if (EnableUniformIntrinsicCombine)
1351-
addPass(createAMDGPUUniformIntrinsicCombineLegacyPass());
1352-
13531350
// This can be disabled by passing ::Disable here or on the command line
13541351
// with --expand-variadics-override=disable.
13551352
addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
@@ -2101,8 +2098,6 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
21012098
if (isPassEnabled(EnableImageIntrinsicOptimizer))
21022099
addPass(AMDGPUImageIntrinsicOptimizerPass(TM));
21032100

2104-
if (EnableUniformIntrinsicCombine)
2105-
addPass(AMDGPUUniformIntrinsicCombinePass());
21062101
// This can be disabled by passing ::Disable here or on the command line
21072102
// with --expand-variadics-override=disable.
21082103
addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,9 @@ false:
113113
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
114114
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
115115
; CHECK: ; %bb.0:
116-
; CHECK-NEXT: s_xor_b32 s0, s0, 1
117-
; CHECK-NEXT: s_and_b32 s0, s0, 1
118-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
116+
; CHECK-NEXT: s_and_b32 s0, 1, s0
117+
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
118+
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
119119
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
120120
; CHECK-NEXT: ; %bb.1: ; %true
121121
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -161,17 +161,16 @@ false:
161161
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
162162
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
163163
; CHECK: ; %bb.0:
164-
; CHECK-NEXT: s_xor_b32 s0, s0, 1
165-
; CHECK-NEXT: s_xor_b32 s0, s0, 1
166-
; CHECK-NEXT: s_and_b32 s0, s0, 1
164+
; CHECK-NEXT: s_and_b32 s0, 1, s0
165+
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
167166
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
168-
; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
169-
; CHECK-NEXT: ; %bb.1: ; %true
170-
; CHECK-NEXT: s_mov_b32 s0, 42
171-
; CHECK-NEXT: s_branch .LBB10_3
172-
; CHECK-NEXT: .LBB10_2: ; %false
167+
; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
168+
; CHECK-NEXT: ; %bb.1: ; %false
173169
; CHECK-NEXT: s_mov_b32 s0, 33
174170
; CHECK-NEXT: s_branch .LBB10_3
171+
; CHECK-NEXT: .LBB10_2: ; %true
172+
; CHECK-NEXT: s_mov_b32 s0, 42
173+
; CHECK-NEXT: s_branch .LBB10_3
175174
; CHECK-NEXT: .LBB10_3:
176175
%c = trunc i32 %v to i1
177176
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -209,7 +208,11 @@ false:
209208
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
210209
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
211210
; CHECK: ; %bb.0:
212-
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
211+
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
212+
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
213+
; CHECK-NEXT: s_and_b32 s0, 1, s0
214+
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
215+
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
213216
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
214217
; CHECK-NEXT: ; %bb.1: ; %true
215218
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -255,13 +258,17 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
255258
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
256259
; CHECK: ; %bb.0:
257260
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
258-
; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
259-
; CHECK-NEXT: ; %bb.1: ; %true
260-
; CHECK-NEXT: s_mov_b32 s0, 42
261-
; CHECK-NEXT: s_branch .LBB14_3
262-
; CHECK-NEXT: .LBB14_2: ; %false
261+
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
262+
; CHECK-NEXT: s_and_b32 s0, 1, s0
263+
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
264+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
265+
; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
266+
; CHECK-NEXT: ; %bb.1: ; %false
263267
; CHECK-NEXT: s_mov_b32 s0, 33
264268
; CHECK-NEXT: s_branch .LBB14_3
269+
; CHECK-NEXT: .LBB14_2: ; %true
270+
; CHECK-NEXT: s_mov_b32 s0, 42
271+
; CHECK-NEXT: s_branch .LBB14_3
265272
; CHECK-NEXT: .LBB14_3:
266273
%c = icmp ult i32 %v, 12
267274
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -303,12 +310,14 @@ false:
303310
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
304311
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
305312
; CHECK: ; %bb.0:
306-
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
313+
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
307314
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
308-
; CHECK-NEXT: s_cmp_le_u32 s1, 34
315+
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
309316
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
310-
; CHECK-NEXT: s_or_b32 s0, s0, s1
311-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
317+
; CHECK-NEXT: s_and_b32 s0, s0, s1
318+
; CHECK-NEXT: s_and_b32 s0, 1, s0
319+
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
320+
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
312321
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
313322
; CHECK-NEXT: ; %bb.1: ; %true
314323
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -363,14 +372,16 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
363372
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
364373
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
365374
; CHECK-NEXT: s_and_b32 s0, s0, s1
375+
; CHECK-NEXT: s_and_b32 s0, 1, s0
376+
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
366377
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
367-
; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
368-
; CHECK-NEXT: ; %bb.1: ; %true
369-
; CHECK-NEXT: s_mov_b32 s0, 42
370-
; CHECK-NEXT: s_branch .LBB18_3
371-
; CHECK-NEXT: .LBB18_2: ; %false
378+
; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
379+
; CHECK-NEXT: ; %bb.1: ; %false
372380
; CHECK-NEXT: s_mov_b32 s0, 33
373381
; CHECK-NEXT: s_branch .LBB18_3
382+
; CHECK-NEXT: .LBB18_2: ; %true
383+
; CHECK-NEXT: s_mov_b32 s0, 42
384+
; CHECK-NEXT: s_branch .LBB18_3
374385
; CHECK-NEXT: .LBB18_3:
375386
%v1c = icmp ult i32 %v1, 12
376387
%v2c = icmp ugt i32 %v2, 34

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,9 @@ false:
116116
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
117117
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
118118
; CHECK: ; %bb.0:
119-
; CHECK-NEXT: s_xor_b32 s0, s0, 1
120-
; CHECK-NEXT: s_and_b32 s0, s0, 1
121-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
119+
; CHECK-NEXT: s_and_b32 s0, 1, s0
120+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
121+
; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
122122
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
123123
; CHECK-NEXT: ; %bb.1: ; %true
124124
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -164,17 +164,16 @@ false:
164164
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
165165
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
166166
; CHECK: ; %bb.0:
167-
; CHECK-NEXT: s_xor_b32 s0, s0, 1
168-
; CHECK-NEXT: s_xor_b32 s0, s0, 1
169-
; CHECK-NEXT: s_and_b32 s0, s0, 1
170-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
171-
; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
172-
; CHECK-NEXT: ; %bb.1: ; %true
173-
; CHECK-NEXT: s_mov_b32 s0, 42
174-
; CHECK-NEXT: s_branch .LBB10_3
175-
; CHECK-NEXT: .LBB10_2: ; %false
167+
; CHECK-NEXT: s_and_b32 s0, 1, s0
168+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
169+
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
170+
; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
171+
; CHECK-NEXT: ; %bb.1: ; %false
176172
; CHECK-NEXT: s_mov_b32 s0, 33
177173
; CHECK-NEXT: s_branch .LBB10_3
174+
; CHECK-NEXT: .LBB10_2: ; %true
175+
; CHECK-NEXT: s_mov_b32 s0, 42
176+
; CHECK-NEXT: s_branch .LBB10_3
178177
; CHECK-NEXT: .LBB10_3:
179178
%c = trunc i32 %v to i1
180179
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -212,7 +211,11 @@ false:
212211
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
213212
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
214213
; CHECK: ; %bb.0:
215-
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
214+
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
215+
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
216+
; CHECK-NEXT: s_and_b32 s0, 1, s0
217+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
218+
; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
216219
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
217220
; CHECK-NEXT: ; %bb.1: ; %true
218221
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -258,13 +261,17 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
258261
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
259262
; CHECK: ; %bb.0:
260263
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
261-
; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
262-
; CHECK-NEXT: ; %bb.1: ; %true
263-
; CHECK-NEXT: s_mov_b32 s0, 42
264-
; CHECK-NEXT: s_branch .LBB14_3
265-
; CHECK-NEXT: .LBB14_2: ; %false
264+
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
265+
; CHECK-NEXT: s_and_b32 s0, 1, s0
266+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
267+
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
268+
; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
269+
; CHECK-NEXT: ; %bb.1: ; %false
266270
; CHECK-NEXT: s_mov_b32 s0, 33
267271
; CHECK-NEXT: s_branch .LBB14_3
272+
; CHECK-NEXT: .LBB14_2: ; %true
273+
; CHECK-NEXT: s_mov_b32 s0, 42
274+
; CHECK-NEXT: s_branch .LBB14_3
268275
; CHECK-NEXT: .LBB14_3:
269276
%c = icmp ult i32 %v, 12
270277
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -306,12 +313,14 @@ false:
306313
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
307314
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
308315
; CHECK: ; %bb.0:
309-
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
316+
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
310317
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
311-
; CHECK-NEXT: s_cmp_le_u32 s1, 34
318+
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
312319
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
313-
; CHECK-NEXT: s_or_b32 s0, s0, s1
314-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
320+
; CHECK-NEXT: s_and_b32 s0, s0, s1
321+
; CHECK-NEXT: s_and_b32 s0, 1, s0
322+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
323+
; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
315324
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
316325
; CHECK-NEXT: ; %bb.1: ; %true
317326
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -366,14 +375,16 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
366375
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
367376
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
368377
; CHECK-NEXT: s_and_b32 s0, s0, s1
369-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
370-
; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
371-
; CHECK-NEXT: ; %bb.1: ; %true
372-
; CHECK-NEXT: s_mov_b32 s0, 42
373-
; CHECK-NEXT: s_branch .LBB18_3
374-
; CHECK-NEXT: .LBB18_2: ; %false
378+
; CHECK-NEXT: s_and_b32 s0, 1, s0
379+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
380+
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
381+
; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
382+
; CHECK-NEXT: ; %bb.1: ; %false
375383
; CHECK-NEXT: s_mov_b32 s0, 33
376384
; CHECK-NEXT: s_branch .LBB18_3
385+
; CHECK-NEXT: .LBB18_2: ; %true
386+
; CHECK-NEXT: s_mov_b32 s0, 42
387+
; CHECK-NEXT: s_branch .LBB18_3
377388
; CHECK-NEXT: .LBB18_3:
378389
%v1c = icmp ult i32 %v1, 12
379390
%v2c = icmp ugt i32 %v2, 34

0 commit comments

Comments
 (0)