Skip to content

Commit e965482

Browse files
update description, add 03 run line in test
1 parent 7302227 commit e965482

File tree

2 files changed

+52
-107
lines changed

2 files changed

+52
-107
lines changed

llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
/// uniformity must be convergent (and isel will introduce v_readfirstlane for
1818
/// them if their operands can't be proven statically uniform).
1919
///
20-
/// Although the transformations are applied at the function level, this pass is
21-
/// structured as a ModulePass because we must also inspect intrinsic
22-
/// declarations at the module scope. A function pass would require re-scanning
23-
/// all instructions in every function, while the module view lets us directly
24-
/// pair intrinsic uses with their declarations in a single traversal.
20+
/// This pass is implemented as a ModulePass because intrinsic declarations
21+
/// exist at the module scope, allowing us to skip processing entirely if no
22+
/// declarations are present and to traverse their user lists directly when
23+
/// they are. A FunctionPass would instead require scanning every instruction
24+
/// in every function to find relevant intrinsics, which is far less efficient.
2525
//===----------------------------------------------------------------------===//
2626

2727
#include "AMDGPU.h"
@@ -47,7 +47,8 @@ using namespace llvm;
4747
using namespace llvm::AMDGPU;
4848
using namespace llvm::PatternMatch;
4949

50-
/// Wrapper for querying uniformity info that first checks new instructions.
50+
/// Wrapper for querying uniformity info that first checks locally tracked
51+
/// instructions.
5152
static bool
5253
isDivergentUseWithNew(const Use &U, const UniformityInfo &UI,
5354
const ValueMap<const Value *, bool> &Tracker) {
@@ -57,7 +58,7 @@ isDivergentUseWithNew(const Use &U, const UniformityInfo &UI,
5758
return UI.isDivergentUse(U);
5859
}
5960

60-
/// Optimizes uniform intrinsics.
61+
/// Optimizes uniform intrinsics calls if their operand can be proven uniform.
6162
static bool optimizeUniformIntrinsic(IntrinsicInst &II,
6263
const UniformityInfo &UI,
6364
ValueMap<const Value *, bool> &Tracker) {
@@ -119,7 +120,7 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
119120
return false;
120121
}
121122

122-
/// Iterate over intrinsics in the module to optimise.
123+
/// Iterates over intrinsic declarations in the module to optimize their uses.
123124
static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM) {
124125
bool IsChanged = false;
125126
ValueMap<const Value *, bool> Tracker;

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll

Lines changed: 43 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK
33
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
4-
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,early-cse,instcombine,simplifycfg -S < %s | FileCheck %s -check-prefix=DCE-CHECK
4+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O3 -S < %s | FileCheck %s -check-prefix=O3-CHECK
55

66
define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) {
77
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
@@ -31,18 +31,11 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1)
3131
; PASS-CHECK: [[EXIT]]:
3232
; PASS-CHECK-NEXT: ret void
3333
;
34-
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
35-
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
36-
; DCE-CHECK-NEXT: [[ENTRY:.*]]:
37-
; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
38-
; DCE-CHECK: [[WHILE]]:
39-
; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
40-
; DCE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
41-
; DCE-CHECK: [[IF]]:
42-
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
43-
; DCE-CHECK-NEXT: br label %[[WHILE]]
44-
; DCE-CHECK: [[EXIT]]:
45-
; DCE-CHECK-NEXT: ret void
34+
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
35+
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
36+
; O3-CHECK-NEXT: [[ENTRY:.*:]]
37+
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
38+
; O3-CHECK-NEXT: ret void
4639
;
4740
entry:
4841
br label %while
@@ -90,18 +83,11 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(ptr addrs
9083
; PASS-CHECK: [[EXIT]]:
9184
; PASS-CHECK-NEXT: ret void
9285
;
93-
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(
94-
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
95-
; DCE-CHECK-NEXT: [[ENTRY:.*]]:
96-
; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
97-
; DCE-CHECK: [[WHILE]]:
98-
; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
99-
; DCE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
100-
; DCE-CHECK: [[IF]]:
101-
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
102-
; DCE-CHECK-NEXT: br label %[[WHILE]]
103-
; DCE-CHECK: [[EXIT]]:
104-
; DCE-CHECK-NEXT: ret void
86+
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(
87+
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
88+
; O3-CHECK-NEXT: [[ENTRY:.*:]]
89+
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
90+
; O3-CHECK-NEXT: ret void
10591
;
10692
entry:
10793
br label %while
@@ -147,18 +133,11 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero(ptr addrspace(1)
147133
; PASS-CHECK: [[EXIT]]:
148134
; PASS-CHECK-NEXT: ret void
149135
;
150-
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero(
151-
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
152-
; DCE-CHECK-NEXT: [[ENTRY:.*]]:
153-
; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
154-
; DCE-CHECK: [[WHILE]]:
155-
; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
156-
; DCE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
157-
; DCE-CHECK: [[IF]]:
158-
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
159-
; DCE-CHECK-NEXT: br label %[[WHILE]]
160-
; DCE-CHECK: [[EXIT]]:
161-
; DCE-CHECK-NEXT: ret void
136+
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero(
137+
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
138+
; O3-CHECK-NEXT: [[ENTRY:.*:]]
139+
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
140+
; O3-CHECK-NEXT: ret void
162141
;
163142
entry:
164143
br label %while
@@ -203,18 +182,11 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(ptr addrspac
203182
; PASS-CHECK: [[EXIT]]:
204183
; PASS-CHECK-NEXT: ret void
205184
;
206-
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(
207-
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
208-
; DCE-CHECK-NEXT: [[ENTRY:.*]]:
209-
; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
210-
; DCE-CHECK: [[WHILE]]:
211-
; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
212-
; DCE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
213-
; DCE-CHECK: [[IF]]:
214-
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
215-
; DCE-CHECK-NEXT: br label %[[WHILE]]
216-
; DCE-CHECK: [[EXIT]]:
217-
; DCE-CHECK-NEXT: ret void
185+
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(
186+
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
187+
; O3-CHECK-NEXT: [[ENTRY:.*:]]
188+
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
189+
; O3-CHECK-NEXT: ret void
218190
;
219191
entry:
220192
br label %while
@@ -267,18 +239,11 @@ define protected amdgpu_kernel void @trivial_uniform_waterfall(ptr addrspace(1)
267239
; PASS-CHECK: [[EXIT]]:
268240
; PASS-CHECK-NEXT: ret void
269241
;
270-
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_uniform_waterfall(
271-
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
272-
; DCE-CHECK-NEXT: [[ENTRY:.*]]:
273-
; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
274-
; DCE-CHECK: [[WHILE]]:
275-
; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[WORK:.*]] ]
276-
; DCE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[WORK]]
277-
; DCE-CHECK: [[WORK]]:
278-
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
279-
; DCE-CHECK-NEXT: br label %[[WHILE]]
280-
; DCE-CHECK: [[EXIT]]:
281-
; DCE-CHECK-NEXT: ret void
242+
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_uniform_waterfall(
243+
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
244+
; O3-CHECK-NEXT: [[ENTRY:.*:]]
245+
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
246+
; O3-CHECK-NEXT: ret void
282247
;
283248
entry:
284249
br label %while
@@ -312,8 +277,8 @@ define protected amdgpu_kernel void @uniform_waterfall(ptr addrspace(1) %out, i3
312277
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]], i32 [[MYMASK:%.*]]) local_unnamed_addr #[[ATTR0]] {
313278
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
314279
; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
315-
; CURRENT-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i32 [[TMP0]], 0
316-
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE]], label %[[EXIT:.*]], label %[[WORK_PEEL:.*]]
280+
; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0
281+
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[WORK_PEEL:.*]]
317282
; CURRENT-CHECK: [[WORK_PEEL]]:
318283
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
319284
; CURRENT-CHECK-NEXT: br label %[[EXIT]]
@@ -341,18 +306,11 @@ define protected amdgpu_kernel void @uniform_waterfall(ptr addrspace(1) %out, i3
341306
; PASS-CHECK: [[EXIT]]:
342307
; PASS-CHECK-NEXT: ret void
343308
;
344-
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @uniform_waterfall(
345-
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[MYMASK:%.*]]) #[[ATTR0]] {
346-
; DCE-CHECK-NEXT: [[ENTRY:.*]]:
347-
; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
348-
; DCE-CHECK: [[WHILE]]:
349-
; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[WORK:.*]] ]
350-
; DCE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[WORK]]
351-
; DCE-CHECK: [[WORK]]:
352-
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
353-
; DCE-CHECK-NEXT: br label %[[WHILE]]
354-
; DCE-CHECK: [[EXIT]]:
355-
; DCE-CHECK-NEXT: ret void
309+
; O3-CHECK-LABEL: define protected amdgpu_kernel void @uniform_waterfall(
310+
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]], i32 [[MYMASK:%.*]]) local_unnamed_addr #[[ATTR0]] {
311+
; O3-CHECK-NEXT: [[ENTRY:.*:]]
312+
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
313+
; O3-CHECK-NEXT: ret void
356314
;
357315
entry:
358316
br label %while
@@ -409,18 +367,11 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(ptr addrspace
409367
; PASS-CHECK: [[EXIT]]:
410368
; PASS-CHECK-NEXT: ret void
411369
;
412-
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(
413-
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
414-
; DCE-CHECK-NEXT: [[ENTRY:.*]]:
415-
; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
416-
; DCE-CHECK: [[WHILE]]:
417-
; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
418-
; DCE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
419-
; DCE-CHECK: [[IF]]:
420-
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
421-
; DCE-CHECK-NEXT: br label %[[WHILE]]
422-
; DCE-CHECK: [[EXIT]]:
423-
; DCE-CHECK-NEXT: ret void
370+
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(
371+
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
372+
; O3-CHECK-NEXT: [[ENTRY:.*:]]
373+
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
374+
; O3-CHECK-NEXT: ret void
424375
;
425376
entry:
426377
br label %while
@@ -466,18 +417,11 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(ptr addrspace
466417
; PASS-CHECK: [[EXIT]]:
467418
; PASS-CHECK-NEXT: ret void
468419
;
469-
; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(
470-
; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
471-
; DCE-CHECK-NEXT: [[ENTRY:.*]]:
472-
; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
473-
; DCE-CHECK: [[WHILE]]:
474-
; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
475-
; DCE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
476-
; DCE-CHECK: [[IF]]:
477-
; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
478-
; DCE-CHECK-NEXT: br label %[[WHILE]]
479-
; DCE-CHECK: [[EXIT]]:
480-
; DCE-CHECK-NEXT: ret void
420+
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(
421+
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
422+
; O3-CHECK-NEXT: [[ENTRY:.*:]]
423+
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
424+
; O3-CHECK-NEXT: ret void
481425
;
482426
entry:
483427
br label %while

0 commit comments

Comments
 (0)