11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2- ; RUN: opt < %s -passes=gvn -S | FileCheck %s
2+ ; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa - passes=instcombine -S | FileCheck %s
33;
4- ; Tests for assume-based ballot optimizations
5- ; This optimization recognizes patterns like:
6- ; assume(ballot(cmp) == -1) -> cmp is true on all lanes
7- ; assume(ballot(cmp) == 0) -> cmp is false on all lanes
4+ ; Tests for assume-based ballot optimizations for patterns like:
5+ ; assume(ballot(cmp) == -1) -> replace uses of cmp with true
6+ ; assume(ballot(cmp) == 0) -> replace uses of cmp with false
7+ ; assume(ballot(cmp) == ballot(1)) -> replace uses of cmp with true
88
99declare void @llvm.assume (i1 )
1010declare i64 @llvm.amdgcn.ballot.i64 (i1 )
@@ -26,7 +26,6 @@ define amdgpu_kernel void @assume_ballot_all_lanes_i64(i32 %x, ptr addrspace(1)
2626; CHECK-NEXT: store i32 1, ptr addrspace(1) [[OUT:%.*]], align 4
2727; CHECK-NEXT: ret void
2828; CHECK: bar:
29- ; CHECK-NEXT: store i32 0, ptr addrspace(1) [[OUT]], align 4
3029; CHECK-NEXT: ret void
3130;
3231 %cmp = icmp eq i32 %x , 0
@@ -70,7 +69,7 @@ define amdgpu_kernel void @assume_ballot_exec_mask_ballot_true(i32 %x, ptr addrs
7069; CHECK-NEXT: [[EXEC:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
7170; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], [[EXEC]]
7271; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
73- ; CHECK-NEXT: br i1 [[CMP]] , label [[FOO:%.*]], label [[BAR:%.*]]
72+ ; CHECK-NEXT: br i1 true , label [[FOO:%.*]], label [[BAR:%.*]]
7473; CHECK: foo:
7574; CHECK-NEXT: ret void
7675; CHECK: bar:
@@ -147,7 +146,7 @@ define amdgpu_kernel void @assume_ballot_exec_mask_wave32(i32 %x, ptr addrspace(
147146; CHECK-NEXT: [[EXEC:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
148147; CHECK-NEXT: [[ALL:%.*]] = icmp eq i32 [[BALLOT]], [[EXEC]]
149148; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
150- ; CHECK-NEXT: br i1 [[CMP]] , label [[FOO:%.*]], label [[BAR:%.*]]
149+ ; CHECK-NEXT: br i1 true , label [[FOO:%.*]], label [[BAR:%.*]]
151150; CHECK: foo:
152151; CHECK-NEXT: ret void
153152; CHECK: bar:
@@ -175,7 +174,7 @@ define amdgpu_kernel void @assume_ballot_dominance(i32 %x, ptr addrspace(1) %out
175174; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[CMP]])
176175; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], -1
177176; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
178- ; CHECK-NEXT: [[OUT2:%.*]] = getelementptr i32 , ptr addrspace(1) [[OUT]], i64 1
177+ ; CHECK-NEXT: [[OUT2:%.*]] = getelementptr i8 , ptr addrspace(1) [[OUT]], i64 4
179178; CHECK-NEXT: store i32 1, ptr addrspace(1) [[OUT2]], align 4
180179; CHECK-NEXT: ret void
181180;
@@ -196,7 +195,7 @@ define amdgpu_kernel void @assume_ballot_swapped(i32 %x, ptr addrspace(1) %out)
196195; CHECK-LABEL: @assume_ballot_swapped(
197196; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
198197; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[CMP]])
199- ; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 -1, [[BALLOT]]
198+ ; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], -1
200199; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
201200; CHECK-NEXT: br i1 true, label [[FOO:%.*]], label [[BAR:%.*]]
202201; CHECK: foo:
@@ -224,7 +223,7 @@ define amdgpu_kernel void @assume_ballot_exec_mask_swapped(i32 %x, ptr addrspace
224223; CHECK-NEXT: [[EXEC:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
225224; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[EXEC]], [[BALLOT]]
226225; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
227- ; CHECK-NEXT: br i1 [[CMP]] , label [[FOO:%.*]], label [[BAR:%.*]]
226+ ; CHECK-NEXT: br i1 true , label [[FOO:%.*]], label [[BAR:%.*]]
228227; CHECK: foo:
229228; CHECK-NEXT: ret void
230229; CHECK: bar:
@@ -251,7 +250,7 @@ define amdgpu_kernel void @assume_ballot_multiple_uses(i32 %x, ptr addrspace(1)
251250; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], -1
252251; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
253252; CHECK-NEXT: store i32 1, ptr addrspace(1) [[OUT:%.*]], align 4
254- ; CHECK-NEXT: [[OUT2:%.*]] = getelementptr i32 , ptr addrspace(1) [[OUT]], i64 1
253+ ; CHECK-NEXT: [[OUT2:%.*]] = getelementptr i8 , ptr addrspace(1) [[OUT]], i64 4
255254; CHECK-NEXT: store i32 10, ptr addrspace(1) [[OUT2]], align 4
256255; CHECK-NEXT: br i1 true, label [[FOO:%.*]], label [[BAR:%.*]]
257256; CHECK: foo:
@@ -284,12 +283,10 @@ define amdgpu_kernel void @assume_ballot_exec_mask_multiple_uses(i32 %x, ptr add
284283; CHECK-NEXT: [[EXEC:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
285284; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], [[EXEC]]
286285; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
287- ; CHECK-NEXT: [[USE1:%.*]] = zext i1 [[CMP]] to i32
288- ; CHECK-NEXT: store i32 [[USE1]], ptr addrspace(1) [[OUT:%.*]], align 4
289- ; CHECK-NEXT: [[USE2:%.*]] = select i1 [[CMP]], i32 10, i32 20
290- ; CHECK-NEXT: [[OUT2:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i64 1
291- ; CHECK-NEXT: store i32 [[USE2]], ptr addrspace(1) [[OUT2]], align 4
292- ; CHECK-NEXT: br i1 [[CMP]], label [[FOO:%.*]], label [[BAR:%.*]]
286+ ; CHECK-NEXT: store i32 1, ptr addrspace(1) [[OUT:%.*]], align 4
287+ ; CHECK-NEXT: [[OUT2:%.*]] = getelementptr i8, ptr addrspace(1) [[OUT]], i64 4
288+ ; CHECK-NEXT: store i32 10, ptr addrspace(1) [[OUT2]], align 4
289+ ; CHECK-NEXT: br i1 true, label [[FOO:%.*]], label [[BAR:%.*]]
293290; CHECK: foo:
294291; CHECK-NEXT: ret void
295292; CHECK: bar:
@@ -313,27 +310,24 @@ bar:
313310 ret void
314311}
315312
316- ; ============================================================================
317- ; NEGATIVE CASES
318- ; ============================================================================
319-
320- ; Test 1: assume(ballot != -1) -> cmp should not be transformed (cmp is false in atleast one lane)
321- define amdgpu_kernel void @assume_ballot_ne_negative (i32 %x , ptr addrspace (1 ) %out ) {
322- ; CHECK-LABEL: @assume_ballot_ne_negative(
313+ ; Test 12: ballot(cmp) == ballot(false) -> cmp replaced with false
314+ define amdgpu_kernel void @assume_ballot_false (i32 %x , ptr addrspace (1 ) %out ) {
315+ ; CHECK-LABEL: @assume_ballot_false(
323316; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
324317; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[CMP]])
325- ; CHECK-NEXT: [[NOT_ALL :%.*]] = icmp ne i64 [[BALLOT]], -1
326- ; CHECK-NEXT: call void @llvm.assume(i1 [[NOT_ALL ]])
327- ; CHECK-NEXT: br i1 [[CMP]] , label [[FOO:%.*]], label [[BAR:%.*]]
318+ ; CHECK-NEXT: [[MATCHES :%.*]] = icmp eq i64 [[BALLOT]], 0
319+ ; CHECK-NEXT: call void @llvm.assume(i1 [[MATCHES ]])
320+ ; CHECK-NEXT: br i1 false , label [[FOO:%.*]], label [[BAR:%.*]]
328321; CHECK: foo:
329322; CHECK-NEXT: ret void
330323; CHECK: bar:
331324; CHECK-NEXT: ret void
332325;
333326 %cmp = icmp eq i32 %x , 0
334327 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %cmp )
335- %not_all = icmp ne i64 %ballot , -1
336- call void @llvm.assume (i1 %not_all )
328+ %not_exec = call i64 @llvm.amdgcn.ballot.i64 (i1 false )
329+ %matches = icmp eq i64 %ballot , %not_exec
330+ call void @llvm.assume (i1 %matches )
337331 br i1 %cmp , label %foo , label %bar
338332
339333foo:
@@ -342,13 +336,17 @@ bar:
342336 ret void
343337}
344338
345- ; Test 2: assume(ballot != 0) -> cmp should not be transformed (cmp is true in atleast one lane)
346- define amdgpu_kernel void @assume_ballot_ne_zero_negative (i32 %x , ptr addrspace (1 ) %out ) {
347- ; CHECK-LABEL: @assume_ballot_ne_zero_negative(
339+ ; ============================================================================
340+ ; NEGATIVE CASES
341+ ; ============================================================================
342+
343+ ; Test 1: assume(ballot != -1) -> no transformation (requires icmp eq)
344+ define amdgpu_kernel void @assume_ballot_ne_negative (i32 %x , ptr addrspace (1 ) %out ) {
345+ ; CHECK-LABEL: @assume_ballot_ne_negative(
348346; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
349347; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[CMP]])
350- ; CHECK-NEXT: [[SOME :%.*]] = icmp ne i64 [[BALLOT]], 0
351- ; CHECK-NEXT: call void @llvm.assume(i1 [[SOME ]])
348+ ; CHECK-NEXT: [[NOT_ALL :%.*]] = icmp ne i64 [[BALLOT]], -1
349+ ; CHECK-NEXT: call void @llvm.assume(i1 [[NOT_ALL ]])
352350; CHECK-NEXT: br i1 [[CMP]], label [[FOO:%.*]], label [[BAR:%.*]]
353351; CHECK: foo:
354352; CHECK-NEXT: ret void
@@ -357,8 +355,8 @@ define amdgpu_kernel void @assume_ballot_ne_zero_negative(i32 %x, ptr addrspace(
357355;
358356 %cmp = icmp eq i32 %x , 0
359357 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %cmp )
360- %some = icmp ne i64 %ballot , 0
361- call void @llvm.assume (i1 %some )
358+ %not_all = icmp ne i64 %ballot , - 1
359+ call void @llvm.assume (i1 %not_all )
362360 br i1 %cmp , label %foo , label %bar
363361
364362foo:
@@ -367,14 +365,13 @@ bar:
367365 ret void
368366}
369367
370- ; Test 3: ballot(cmp) == ballot(false ) -> cmp should not be transformed (RHS is not EXEC MASK )
371- define amdgpu_kernel void @assume_ballot_not_exec_mask (i32 %x , ptr addrspace (1 ) %out ) {
372- ; CHECK-LABEL: @assume_ballot_not_exec_mask (
368+ ; Test 2: assume(ballot != 0 ) -> no transformation (requires icmp eq )
369+ define amdgpu_kernel void @assume_ballot_ne_zero_negative (i32 %x , ptr addrspace (1 ) %out ) {
370+ ; CHECK-LABEL: @assume_ballot_ne_zero_negative (
373371; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
374372; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[CMP]])
375- ; CHECK-NEXT: [[NOT_EXEC:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 false)
376- ; CHECK-NEXT: [[MATCHES:%.*]] = icmp eq i64 [[BALLOT]], [[NOT_EXEC]]
377- ; CHECK-NEXT: call void @llvm.assume(i1 [[MATCHES]])
373+ ; CHECK-NEXT: [[SOME:%.*]] = icmp ne i64 [[BALLOT]], 0
374+ ; CHECK-NEXT: call void @llvm.assume(i1 [[SOME]])
378375; CHECK-NEXT: br i1 [[CMP]], label [[FOO:%.*]], label [[BAR:%.*]]
379376; CHECK: foo:
380377; CHECK-NEXT: ret void
@@ -383,9 +380,8 @@ define amdgpu_kernel void @assume_ballot_not_exec_mask(i32 %x, ptr addrspace(1)
383380;
384381 %cmp = icmp eq i32 %x , 0
385382 %ballot = call i64 @llvm.amdgcn.ballot.i64 (i1 %cmp )
386- %not_exec = call i64 @llvm.amdgcn.ballot.i64 (i1 false )
387- %matches = icmp eq i64 %ballot , %not_exec
388- call void @llvm.assume (i1 %matches )
383+ %some = icmp ne i64 %ballot , 0
384+ call void @llvm.assume (i1 %some )
389385 br i1 %cmp , label %foo , label %bar
390386
391387foo:
394390 ret void
395391}
396392
397- ; Test 4 : Constant as mask value (other than -1 or 0) -> cmp should not be transformed
393+ ; Test 3 : Constant mask (other than -1/ 0) -> no transformation
398394define amdgpu_kernel void @assume_ballot_constant_mask (i32 %x , ptr addrspace (1 ) %out ) {
399395; CHECK-LABEL: @assume_ballot_constant_mask(
400396; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
419415 ret void
420416}
421417
422- ; Test 5: Arbitrary mask -> cmp should not be transformed
418+ ; Test 4: Runtime mask value -> no transformation
423419define amdgpu_kernel void @assume_ballot_arbitrary_mask (i32 %x , i64 %mask , ptr addrspace (1 ) %out ) {
424420; CHECK-LABEL: @assume_ballot_arbitrary_mask(
425421; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
0 commit comments