Skip to content

Commit b19b765

Browse files
Skip only constant args
1 parent f4f1277 commit b19b765

File tree

3 files changed

+62
-14
lines changed

3 files changed

+62
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,14 +1349,16 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
13491349
// assume(ballot(x) == -1) -> x = true
13501350
// assume(ballot(x) == 0) -> x = false
13511351
//
1352-
// Skip if Arg is not an instruction (e.g., constant, argument).
1353-
if (!isa<Instruction>(Arg))
1352+
// Skip if Arg is a constant.
1353+
if (isa<Constant>(Arg))
13541354
break;
13551355

13561356
// Skip if ballot width doesn't match wave size.
13571357
if (ST->getWavefrontSize() != II.getType()->getIntegerBitWidth())
13581358
break;
13591359

1360+
// For each llvm.assume that references the ballot intrinsic, try to infer
1361+
// the value of the ballot's condition argument from the assumed relation.
13601362
for (auto &AssumeVH : IC.getAssumptionCache().assumptionsFor(&II)) {
13611363
if (!AssumeVH)
13621364
continue;

llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.ballot-assume-wave32.ll

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,11 +274,34 @@ bar:
274274
ret void
275275
}
276276

277+
; Test 10: Function argument can be optimized -> arg replaced with true
278+
define amdgpu_kernel void @wave32_ballot_i32_argument(i1 %arg) {
279+
; CHECK-LABEL: @wave32_ballot_i32_argument(
280+
; CHECK-NEXT: [[BALLOT:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[ARG:%.*]])
281+
; CHECK-NEXT: [[ALL:%.*]] = icmp eq i32 [[BALLOT]], -1
282+
; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
283+
; CHECK-NEXT: br i1 true, label [[FOO:%.*]], label [[BAR:%.*]]
284+
; CHECK: foo:
285+
; CHECK-NEXT: ret void
286+
; CHECK: bar:
287+
; CHECK-NEXT: ret void
288+
;
289+
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %arg)
290+
%all = icmp eq i32 %ballot, -1
291+
call void @llvm.assume(i1 %all)
292+
br i1 %arg, label %foo, label %bar
293+
294+
foo:
295+
ret void
296+
bar:
297+
ret void
298+
}
299+
277300
; ============================================================================
278301
; NEGATIVE CASES: ballot.i32 on wave32
279302
; ============================================================================
280303

281-
; Test 10: assume(ballot != -1) -> no transformation (requires icmp eq)
304+
; Test 11: assume(ballot != -1) -> no transformation (requires icmp eq)
282305
define amdgpu_kernel void @wave32_ballot_i32_ne_negative(i32 %x, ptr addrspace(1) %out) {
283306
; CHECK-LABEL: @wave32_ballot_i32_ne_negative(
284307
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -303,7 +326,7 @@ bar:
303326
ret void
304327
}
305328

306-
; Test 11: assume(ballot != 0) -> no transformation (requires icmp eq)
329+
; Test 12: assume(ballot != 0) -> no transformation (requires icmp eq)
307330
define amdgpu_kernel void @wave32_ballot_i32_ne_zero_negative(i32 %x, ptr addrspace(1) %out) {
308331
; CHECK-LABEL: @wave32_ballot_i32_ne_zero_negative(
309332
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -328,7 +351,7 @@ bar:
328351
ret void
329352
}
330353

331-
; Test 12: Constant mask (other than -1/0) -> no transformation
354+
; Test 13: Constant mask (other than -1/0) -> no transformation
332355
define amdgpu_kernel void @wave32_ballot_i32_constant_mask(i32 %x, ptr addrspace(1) %out) {
333356
; CHECK-LABEL: @wave32_ballot_i32_constant_mask(
334357
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -353,7 +376,7 @@ bar:
353376
ret void
354377
}
355378

356-
; Test 13: Runtime mask value -> no transformation
379+
; Test 14: Runtime mask value -> no transformation
357380
define amdgpu_kernel void @wave32_ballot_i32_arbitrary_mask(i32 %x, i32 %mask, ptr addrspace(1) %out) {
358381
; CHECK-LABEL: @wave32_ballot_i32_arbitrary_mask(
359382
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -382,7 +405,7 @@ bar:
382405
; POSITIVE CASES: ballot.i64 on wave32 (transformed to ballot.i32+zext)
383406
; ============================================================================
384407

385-
; Test 14: assume(ballot.i64(cmp) == -1) on wave32 -> cmp replaced with true
408+
; Test 15: assume(ballot.i64(cmp) == -1) on wave32 -> cmp replaced with true
386409
define amdgpu_kernel void @wave32_ballot_i64_all_lanes(i32 %x, ptr addrspace(1) %out) {
387410
; CHECK-LABEL: @wave32_ballot_i64_all_lanes(
388411
; CHECK-NEXT: store i1 true, ptr poison, align 1
@@ -404,7 +427,7 @@ bar:
404427
ret void
405428
}
406429

407-
; Test 15: assume(ballot.i64(cmp) == 0) on wave32 -> cmp replaced with false
430+
; Test 16: assume(ballot.i64(cmp) == 0) on wave32 -> cmp replaced with false
408431
define amdgpu_kernel void @wave32_ballot_i64_no_lanes(i32 %x, ptr addrspace(1) %out) {
409432
; CHECK-LABEL: @wave32_ballot_i64_no_lanes(
410433
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0

llvm/test/Transforms/InstCombine/AMDGPU/llvm.amdgcn.ballot-assume-wave64.ll

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -274,11 +274,34 @@ bar:
274274
ret void
275275
}
276276

277+
; Test 10: Function argument can be optimized -> arg replaced with true
278+
define amdgpu_kernel void @wave64_ballot_i64_argument(i1 %arg) {
279+
; CHECK-LABEL: @wave64_ballot_i64_argument(
280+
; CHECK-NEXT: [[BALLOT:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[ARG:%.*]])
281+
; CHECK-NEXT: [[ALL:%.*]] = icmp eq i64 [[BALLOT]], -1
282+
; CHECK-NEXT: call void @llvm.assume(i1 [[ALL]])
283+
; CHECK-NEXT: br i1 true, label [[FOO:%.*]], label [[BAR:%.*]]
284+
; CHECK: foo:
285+
; CHECK-NEXT: ret void
286+
; CHECK: bar:
287+
; CHECK-NEXT: ret void
288+
;
289+
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %arg)
290+
%all = icmp eq i64 %ballot, -1
291+
call void @llvm.assume(i1 %all)
292+
br i1 %arg, label %foo, label %bar
293+
294+
foo:
295+
ret void
296+
bar:
297+
ret void
298+
}
299+
277300
; ============================================================================
278301
; NEGATIVE CASES: ballot.i64 on wave64
279302
; ============================================================================
280303

281-
; Test 10: assume(ballot != -1) -> no transformation (requires icmp eq)
304+
; Test 11: assume(ballot != -1) -> no transformation (requires icmp eq)
282305
define amdgpu_kernel void @wave64_ballot_i64_ne_negative(i32 %x, ptr addrspace(1) %out) {
283306
; CHECK-LABEL: @wave64_ballot_i64_ne_negative(
284307
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -303,7 +326,7 @@ bar:
303326
ret void
304327
}
305328

306-
; Test 11: assume(ballot != 0) -> no transformation (requires icmp eq)
329+
; Test 12: assume(ballot != 0) -> no transformation (requires icmp eq)
307330
define amdgpu_kernel void @wave64_ballot_i64_ne_zero_negative(i32 %x, ptr addrspace(1) %out) {
308331
; CHECK-LABEL: @wave64_ballot_i64_ne_zero_negative(
309332
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -328,7 +351,7 @@ bar:
328351
ret void
329352
}
330353

331-
; Test 12: Constant mask (other than -1/0) -> no transformation
354+
; Test 13: Constant mask (other than -1/0) -> no transformation
332355
define amdgpu_kernel void @wave64_ballot_i64_constant_mask(i32 %x, ptr addrspace(1) %out) {
333356
; CHECK-LABEL: @wave64_ballot_i64_constant_mask(
334357
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -353,7 +376,7 @@ bar:
353376
ret void
354377
}
355378

356-
; Test 13: Runtime mask value -> no transformation
379+
; Test 14: Runtime mask value -> no transformation
357380
define amdgpu_kernel void @wave64_ballot_i64_arbitrary_mask(i32 %x, i64 %mask, ptr addrspace(1) %out) {
358381
; CHECK-LABEL: @wave64_ballot_i64_arbitrary_mask(
359382
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -382,7 +405,7 @@ bar:
382405
; NEGATIVE CASES: ballot.i32 on wave64
383406
; ============================================================================
384407

385-
; Test 14: assume(ballot.i32 == -1) on wave64 -> no transformation
408+
; Test 15: assume(ballot.i32 == -1) on wave64 -> no transformation
386409
define amdgpu_kernel void @wave64_ballot_i32_negative(i32 %x, ptr addrspace(1) %out) {
387410
; CHECK-LABEL: @wave64_ballot_i32_negative(
388411
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
@@ -407,7 +430,7 @@ bar:
407430
ret void
408431
}
409432

410-
; Test 15: assume(ballot.i32 == 0) on wave64 -> no transformation
433+
; Test 16: assume(ballot.i32 == 0) on wave64 -> no transformation
411434
define amdgpu_kernel void @wave64_ballot_i32_zero_negative(i32 %x, ptr addrspace(1) %out) {
412435
; CHECK-LABEL: @wave64_ballot_i32_zero_negative(
413436
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0

0 commit comments

Comments
 (0)