11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
3- ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,instcombine, early-cse,simplifycfg -S < %s | FileCheck %s -check-prefix=DCE-CHECK
3+ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,early-cse,instcombine ,simplifycfg -S < %s | FileCheck %s -check-prefix=DCE-CHECK
44
55define protected amdgpu_kernel void @trivial_waterfall (ptr addrspace (1 ) %out ) {
66; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall(
77; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
88; PASS-CHECK-NEXT: [[ENTRY:.*]]:
99; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
1010; PASS-CHECK: [[WHILE]]:
11- ; PASS-CHECK-NEXT: [[DONE1 :%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
12- ; PASS-CHECK-NEXT: [[DONE :%.*]] = xor i1 [[DONE1 ]], true
13- ; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE ]])
14- ; PASS-CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[DONE]] to i64
15- ; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[TMP0 ]], 0
16- ; PASS-CHECK-NEXT: br i1 [[IS_DONE ]], label %[[EXIT:.*]], label %[[IF]]
11+ ; PASS-CHECK-NEXT: [[DONE :%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
12+ ; PASS-CHECK-NEXT: [[NOT_DONE :%.*]] = xor i1 [[DONE ]], true
13+ ; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE ]])
14+ ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
15+ ; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT ]], 0
16+ ; PASS-CHECK-NEXT: br i1 [[TMP0 ]], label %[[EXIT:.*]], label %[[IF]]
1717; PASS-CHECK: [[IF]]:
1818; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
1919; PASS-CHECK-NEXT: br label %[[WHILE]]
@@ -49,18 +49,18 @@ define protected amdgpu_kernel void @waterfall(ptr addrspace(1) %out) {
4949; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
5050; PASS-CHECK-NEXT: [[ENTRY:.*]]:
5151; PASS-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
52- ; PASS-CHECK-NEXT: [[TMP1 :%.*]] = tail call noundef i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP0]])
52+ ; PASS-CHECK-NEXT: [[TID :%.*]] = tail call noundef i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP0]])
5353; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
5454; PASS-CHECK: [[WHILE]]:
55- ; PASS-CHECK-NEXT: [[TMP3 :%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ]
56- ; PASS-CHECK-NEXT: [[TMP4 :%.*]] = xor i1 [[TMP3 ]], true
57- ; PASS-CHECK-NEXT: [[TMP8 :%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP4 ]])
58- ; PASS-CHECK-NEXT: [[TMP9 :%.*]] = icmp eq i64 [[TMP8 ]], 0
59- ; PASS-CHECK-NEXT: br i1 [[TMP9 ]], label %[[EXIT:.*]], label %[[IF:.*]]
55+ ; PASS-CHECK-NEXT: [[DONE :%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ]
56+ ; PASS-CHECK-NEXT: [[NOT_DONE :%.*]] = xor i1 [[DONE ]], true
57+ ; PASS-CHECK-NEXT: [[BALLOT :%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE ]])
58+ ; PASS-CHECK-NEXT: [[IS_DONE :%.*]] = icmp eq i64 [[BALLOT ]], 0
59+ ; PASS-CHECK-NEXT: br i1 [[IS_DONE ]], label %[[EXIT:.*]], label %[[IF:.*]]
6060; PASS-CHECK: [[IF]]:
61- ; PASS-CHECK-NEXT: [[TMP12 :%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP1 ]])
62- ; PASS-CHECK-NEXT: [[TMP13 :%.*]] = icmp eq i32 [[TMP1 ]], [[TMP12 ]]
63- ; PASS-CHECK-NEXT: br i1 [[TMP13 ]], label %[[WORK:.*]], label %[[TAIL]]
61+ ; PASS-CHECK-NEXT: [[FIRST_ACTIVE_ID :%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TID ]])
62+ ; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID :%.*]] = icmp eq i32 [[TID ]], [[FIRST_ACTIVE_ID ]]
63+ ; PASS-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID ]], label %[[WORK:.*]], label %[[TAIL]]
6464; PASS-CHECK: [[WORK]]:
6565; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
6666; PASS-CHECK-NEXT: br label %[[TAIL]]
@@ -76,15 +76,15 @@ define protected amdgpu_kernel void @waterfall(ptr addrspace(1) %out) {
7676; DCE-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
7777; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
7878; DCE-CHECK: [[WHILE]]:
79- ; DCE-CHECK-NEXT: [[TMP2 :%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[TMP12 :%.*]], %[[TAIL:.*]] ]
80- ; DCE-CHECK-NEXT: [[TMP3 :%.*]] = xor i1 [[TMP2 ]], true
81- ; DCE-CHECK-NEXT: [[TMP7 :%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP3 ]])
82- ; DCE-CHECK-NEXT: [[TMP8 :%.*]] = icmp eq i32 [[TMP7 ]], 0
83- ; DCE-CHECK-NEXT: br i1 [[TMP8 ]], label %[[EXIT:.*]], label %[[IF:.*]]
79+ ; DCE-CHECK-NEXT: [[DONE :%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[IS_FIRST_ACTIVE_ID :%.*]], %[[TAIL:.*]] ]
80+ ; DCE-CHECK-NEXT: [[NOT_DONE :%.*]] = xor i1 [[DONE ]], true
81+ ; DCE-CHECK-NEXT: [[TMP1 :%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[NOT_DONE ]])
82+ ; DCE-CHECK-NEXT: [[IS_DONE :%.*]] = icmp eq i32 [[TMP1 ]], 0
83+ ; DCE-CHECK-NEXT: br i1 [[IS_DONE ]], label %[[EXIT:.*]], label %[[IF:.*]]
8484; DCE-CHECK: [[IF]]:
85- ; DCE-CHECK-NEXT: [[TMP11 :%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP0]])
86- ; DCE-CHECK-NEXT: [[TMP12 ]] = icmp eq i32 [[TMP0]], [[TMP11 ]]
87- ; DCE-CHECK-NEXT: br i1 [[TMP12 ]], label %[[WORK:.*]], label %[[TAIL]]
85+ ; DCE-CHECK-NEXT: [[FIRST_ACTIVE_ID :%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP0]])
86+ ; DCE-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID ]] = icmp eq i32 [[TMP0]], [[FIRST_ACTIVE_ID ]]
87+ ; DCE-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID ]], label %[[WORK:.*]], label %[[TAIL]]
8888; DCE-CHECK: [[WORK]]:
8989; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
9090; DCE-CHECK-NEXT: br label %[[TAIL]]
@@ -122,29 +122,25 @@ exit:
122122 ret void
123123}
124124
125- define protected amdgpu_kernel void @trivial_waterfall_multiple_icmp (ptr addrspace (1 ) %out ) {
126- ; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_multiple_icmp (
125+ define protected amdgpu_kernel void @trivial_waterfall_swap_op (ptr addrspace (1 ) %out ) {
126+ ; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_swap_op (
127127; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
128128; PASS-CHECK-NEXT: [[ENTRY:.*]]:
129129; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
130130; PASS-CHECK: [[WHILE]]:
131131; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
132132; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
133133; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
134- ; PASS-CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[NOT_DONE]] to i64
135- ; PASS-CHECK-NEXT: [[IS_DONE_1:%.*]] = icmp eq i64 [[TMP1]], 0
136- ; PASS-CHECK-NEXT: [[TMP0:%.*]] = zext i1 [[NOT_DONE]] to i64
137- ; PASS-CHECK-NEXT: [[IS_DONE_3:%.*]] = icmp eq i64 [[TMP0]], 0
138- ; PASS-CHECK-NEXT: br i1 [[IS_DONE_1]], label %[[EXIT:.*]], label %[[IF]]
134+ ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
135+ ; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 0, [[BALLOT]]
136+ ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]]
139137; PASS-CHECK: [[IF]]:
140138; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
141- ; PASS-CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[NOT_DONE]] to i64
142- ; PASS-CHECK-NEXT: [[IS_DONE_4:%.*]] = icmp eq i64 [[TMP2]], 0
143139; PASS-CHECK-NEXT: br label %[[WHILE]]
144140; PASS-CHECK: [[EXIT]]:
145141; PASS-CHECK-NEXT: ret void
146142;
147- ; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_multiple_icmp (
143+ ; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_swap_op (
148144; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
149145; DCE-CHECK-NEXT: [[ENTRY:.*:]]
150146; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
@@ -157,13 +153,11 @@ while:
157153 %done = phi i1 [ 0 , %entry ], [ 1 , %if ]
158154 %not_done = xor i1 %done , true
159155 %ballot = tail call i64 @llvm.amdgcn.ballot.i64 (i1 %not_done )
160- %is_done_1 = icmp eq i64 %ballot , 0
161- %is_done_2 = icmp eq i64 %ballot , 0
162- br i1 %is_done_1 , label %exit , label %if
156+ %is_done = icmp eq i64 0 , %ballot
157+ br i1 %is_done , label %exit , label %if
163158
164159if:
165160 store i32 5 , ptr addrspace (1 ) %out
166- %is_done_3 = icmp eq i64 %ballot , 0
167161 br label %while
168162
169163exit:
0 commit comments