Skip to content

Commit 4d7093b

Browse files
[AMDGPU] Enable "amdgpu-uniform-intrinsic-combine" pass in pipeline. (#162819)
This PR enables AMDGPUUniformIntrinsicCombine pass in the llc pipeline. Also introduces the "amdgpu-uniform-intrinsic-combine" command-line flag to enable/disable the pass. see the PR:#116953
1 parent e0d9c9c commit 4d7093b

20 files changed

+524
-440
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,6 +1315,9 @@ void AMDGPUPassConfig::addIRPasses() {
13151315
isPassEnabled(EnableImageIntrinsicOptimizer))
13161316
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
13171317

1318+
if (EnableUniformIntrinsicCombine)
1319+
addPass(createAMDGPUUniformIntrinsicCombineLegacyPass());
1320+
13181321
// This can be disabled by passing ::Disable here or on the command line
13191322
// with --expand-variadics-override=disable.
13201323
addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
@@ -2066,6 +2069,8 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
20662069
if (isPassEnabled(EnableImageIntrinsicOptimizer))
20672070
addPass(AMDGPUImageIntrinsicOptimizerPass(TM));
20682071

2072+
if (EnableUniformIntrinsicCombine)
2073+
addPass(AMDGPUUniformIntrinsicCombinePass());
20692074
// This can be disabled by passing ::Disable here or on the command line
20702075
// with --expand-variadics-override=disable.
20712076
addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));

llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,4 +188,4 @@ INITIALIZE_PASS_END(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE,
188188

189189
FunctionPass *llvm::createAMDGPUUniformIntrinsicCombineLegacyPass() {
190190
return new AMDGPUUniformIntrinsicCombineLegacy();
191-
}
191+
}

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,9 @@ false:
113113
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
114114
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
115115
; CHECK: ; %bb.0:
116-
; CHECK-NEXT: s_and_b32 s0, 1, s0
117-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
118-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
116+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
117+
; CHECK-NEXT: s_and_b32 s0, s0, 1
118+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
119119
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
120120
; CHECK-NEXT: ; %bb.1: ; %true
121121
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -161,16 +161,17 @@ false:
161161
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
162162
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
163163
; CHECK: ; %bb.0:
164-
; CHECK-NEXT: s_and_b32 s0, 1, s0
165-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
164+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
165+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
166+
; CHECK-NEXT: s_and_b32 s0, s0, 1
166167
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
167-
; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
168-
; CHECK-NEXT: ; %bb.1: ; %false
169-
; CHECK-NEXT: s_mov_b32 s0, 33
170-
; CHECK-NEXT: s_branch .LBB10_3
171-
; CHECK-NEXT: .LBB10_2: ; %true
168+
; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
169+
; CHECK-NEXT: ; %bb.1: ; %true
172170
; CHECK-NEXT: s_mov_b32 s0, 42
173171
; CHECK-NEXT: s_branch .LBB10_3
172+
; CHECK-NEXT: .LBB10_2: ; %false
173+
; CHECK-NEXT: s_mov_b32 s0, 33
174+
; CHECK-NEXT: s_branch .LBB10_3
174175
; CHECK-NEXT: .LBB10_3:
175176
%c = trunc i32 %v to i1
176177
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -208,11 +209,7 @@ false:
208209
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
209210
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
210211
; CHECK: ; %bb.0:
211-
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
212-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
213-
; CHECK-NEXT: s_and_b32 s0, 1, s0
214-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
215-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
212+
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
216213
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
217214
; CHECK-NEXT: ; %bb.1: ; %true
218215
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -258,17 +255,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
258255
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
259256
; CHECK: ; %bb.0:
260257
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
261-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
262-
; CHECK-NEXT: s_and_b32 s0, 1, s0
263-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
264-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
265-
; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
266-
; CHECK-NEXT: ; %bb.1: ; %false
267-
; CHECK-NEXT: s_mov_b32 s0, 33
268-
; CHECK-NEXT: s_branch .LBB14_3
269-
; CHECK-NEXT: .LBB14_2: ; %true
258+
; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
259+
; CHECK-NEXT: ; %bb.1: ; %true
270260
; CHECK-NEXT: s_mov_b32 s0, 42
271261
; CHECK-NEXT: s_branch .LBB14_3
262+
; CHECK-NEXT: .LBB14_2: ; %false
263+
; CHECK-NEXT: s_mov_b32 s0, 33
264+
; CHECK-NEXT: s_branch .LBB14_3
272265
; CHECK-NEXT: .LBB14_3:
273266
%c = icmp ult i32 %v, 12
274267
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -310,14 +303,12 @@ false:
310303
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
311304
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
312305
; CHECK: ; %bb.0:
313-
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
306+
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
314307
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
315-
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
308+
; CHECK-NEXT: s_cmp_le_u32 s1, 34
316309
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
317-
; CHECK-NEXT: s_and_b32 s0, s0, s1
318-
; CHECK-NEXT: s_and_b32 s0, 1, s0
319-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
320-
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
310+
; CHECK-NEXT: s_or_b32 s0, s0, s1
311+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
321312
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
322313
; CHECK-NEXT: ; %bb.1: ; %true
323314
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -372,16 +363,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
372363
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
373364
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
374365
; CHECK-NEXT: s_and_b32 s0, s0, s1
375-
; CHECK-NEXT: s_and_b32 s0, 1, s0
376-
; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
377366
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
378-
; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
379-
; CHECK-NEXT: ; %bb.1: ; %false
380-
; CHECK-NEXT: s_mov_b32 s0, 33
381-
; CHECK-NEXT: s_branch .LBB18_3
382-
; CHECK-NEXT: .LBB18_2: ; %true
367+
; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
368+
; CHECK-NEXT: ; %bb.1: ; %true
383369
; CHECK-NEXT: s_mov_b32 s0, 42
384370
; CHECK-NEXT: s_branch .LBB18_3
371+
; CHECK-NEXT: .LBB18_2: ; %false
372+
; CHECK-NEXT: s_mov_b32 s0, 33
373+
; CHECK-NEXT: s_branch .LBB18_3
385374
; CHECK-NEXT: .LBB18_3:
386375
%v1c = icmp ult i32 %v1, 12
387376
%v2c = icmp ugt i32 %v2, 34

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll

Lines changed: 28 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,9 @@ false:
116116
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
117117
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
118118
; CHECK: ; %bb.0:
119-
; CHECK-NEXT: s_and_b32 s0, 1, s0
120-
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
121-
; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
119+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
120+
; CHECK-NEXT: s_and_b32 s0, s0, 1
121+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
122122
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
123123
; CHECK-NEXT: ; %bb.1: ; %true
124124
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -164,16 +164,17 @@ false:
164164
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
165165
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
166166
; CHECK: ; %bb.0:
167-
; CHECK-NEXT: s_and_b32 s0, 1, s0
168-
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
169-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
170-
; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
171-
; CHECK-NEXT: ; %bb.1: ; %false
172-
; CHECK-NEXT: s_mov_b32 s0, 33
173-
; CHECK-NEXT: s_branch .LBB10_3
174-
; CHECK-NEXT: .LBB10_2: ; %true
167+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
168+
; CHECK-NEXT: s_xor_b32 s0, s0, 1
169+
; CHECK-NEXT: s_and_b32 s0, s0, 1
170+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
171+
; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
172+
; CHECK-NEXT: ; %bb.1: ; %true
175173
; CHECK-NEXT: s_mov_b32 s0, 42
176174
; CHECK-NEXT: s_branch .LBB10_3
175+
; CHECK-NEXT: .LBB10_2: ; %false
176+
; CHECK-NEXT: s_mov_b32 s0, 33
177+
; CHECK-NEXT: s_branch .LBB10_3
177178
; CHECK-NEXT: .LBB10_3:
178179
%c = trunc i32 %v to i1
179180
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -211,11 +212,7 @@ false:
211212
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
212213
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
213214
; CHECK: ; %bb.0:
214-
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
215-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
216-
; CHECK-NEXT: s_and_b32 s0, 1, s0
217-
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
218-
; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
215+
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
219216
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
220217
; CHECK-NEXT: ; %bb.1: ; %true
221218
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -261,17 +258,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
261258
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
262259
; CHECK: ; %bb.0:
263260
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
264-
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
265-
; CHECK-NEXT: s_and_b32 s0, 1, s0
266-
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
267-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
268-
; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
269-
; CHECK-NEXT: ; %bb.1: ; %false
270-
; CHECK-NEXT: s_mov_b32 s0, 33
271-
; CHECK-NEXT: s_branch .LBB14_3
272-
; CHECK-NEXT: .LBB14_2: ; %true
261+
; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
262+
; CHECK-NEXT: ; %bb.1: ; %true
273263
; CHECK-NEXT: s_mov_b32 s0, 42
274264
; CHECK-NEXT: s_branch .LBB14_3
265+
; CHECK-NEXT: .LBB14_2: ; %false
266+
; CHECK-NEXT: s_mov_b32 s0, 33
267+
; CHECK-NEXT: s_branch .LBB14_3
275268
; CHECK-NEXT: .LBB14_3:
276269
%c = icmp ult i32 %v, 12
277270
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -313,14 +306,12 @@ false:
313306
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
314307
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
315308
; CHECK: ; %bb.0:
316-
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
309+
; CHECK-NEXT: s_cmp_ge_u32 s0, 12
317310
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
318-
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
311+
; CHECK-NEXT: s_cmp_le_u32 s1, 34
319312
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
320-
; CHECK-NEXT: s_and_b32 s0, s0, s1
321-
; CHECK-NEXT: s_and_b32 s0, 1, s0
322-
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
323-
; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
313+
; CHECK-NEXT: s_or_b32 s0, s0, s1
314+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
324315
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
325316
; CHECK-NEXT: ; %bb.1: ; %true
326317
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -375,16 +366,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
375366
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
376367
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
377368
; CHECK-NEXT: s_and_b32 s0, s0, s1
378-
; CHECK-NEXT: s_and_b32 s0, 1, s0
379-
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
380-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
381-
; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
382-
; CHECK-NEXT: ; %bb.1: ; %false
383-
; CHECK-NEXT: s_mov_b32 s0, 33
384-
; CHECK-NEXT: s_branch .LBB18_3
385-
; CHECK-NEXT: .LBB18_2: ; %true
369+
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
370+
; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
371+
; CHECK-NEXT: ; %bb.1: ; %true
386372
; CHECK-NEXT: s_mov_b32 s0, 42
387373
; CHECK-NEXT: s_branch .LBB18_3
374+
; CHECK-NEXT: .LBB18_2: ; %false
375+
; CHECK-NEXT: s_mov_b32 s0, 33
376+
; CHECK-NEXT: s_branch .LBB18_3
388377
; CHECK-NEXT: .LBB18_3:
389378
%v1c = icmp ult i32 %v1, 12
390379
%v2c = icmp ugt i32 %v2, 34

0 commit comments

Comments
 (0)