Skip to content

Commit 70a9e76

Browse files
authored
[AMDGPU] Support xor cond, -1 when lowering BRCOND (#160341)
This can happen when `xor cond, -1` is not combined.
1 parent d27654f commit 70a9e76

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7540,17 +7540,30 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, SelectionDAG &DAG) const {
75407540
SDNode *BR = nullptr;
75417541
SDNode *SetCC = nullptr;
75427542

7543-
if (Intr->getOpcode() == ISD::SETCC) {
7543+
switch (Intr->getOpcode()) {
7544+
case ISD::SETCC: {
75447545
// As long as we negate the condition everything is fine
75457546
SetCC = Intr;
75467547
Intr = SetCC->getOperand(0).getNode();
7547-
7548-
} else {
7548+
break;
7549+
}
7550+
case ISD::XOR: {
7551+
// Similar to SETCC, if we have (xor c, -1), we will be fine.
7552+
SDValue LHS = Intr->getOperand(0);
7553+
SDValue RHS = Intr->getOperand(1);
7554+
if (auto *C = dyn_cast<ConstantSDNode>(RHS); C && C->getZExtValue()) {
7555+
Intr = LHS.getNode();
7556+
break;
7557+
}
7558+
[[fallthrough]];
7559+
}
7560+
default: {
75497561
// Get the target from BR if we don't negate the condition
75507562
BR = findUser(BRCOND, ISD::BR);
75517563
assert(BR && "brcond missing unconditional branch user");
75527564
Target = BR->getOperand(1);
75537565
}
7566+
}
75547567

75557568
unsigned CFNode = isCFIntrinsic(Intr);
75567569
if (CFNode == 0) {
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --debug-counter=dagcombine=0 -start-before=si-annotate-control-flow %s -o - | FileCheck %s
3+
4+
define amdgpu_kernel void @test(i32 %N, ptr addrspace(1) %p) {
5+
; CHECK-LABEL: test:
6+
; CHECK: ; %bb.0: ; %entry
7+
; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0
8+
; CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 1, v0
9+
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], vcc
10+
; CHECK-NEXT: s_endpgm
11+
entry:
12+
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
13+
%cmp2 = icmp slt i32 %id.x, 1
14+
br i1 %cmp2, label %if.then, label %exit
15+
16+
if.then:
17+
%idx.ext = zext i32 %N to i64
18+
%add.ptr = getelementptr i8, ptr addrspace(1) %p, i64 %idx.ext
19+
ret void
20+
21+
exit:
22+
ret void
23+
}

0 commit comments

Comments
 (0)