Skip to content

Commit bd21095

Browse files
authored
[MachineBasicBlock] Don't split loop header successor if the terminator is unanalyzable (#170146)
Fixes #170051 The previous implementation allows splitting the successor if it's the loop header, regardless of whether the terminator of `this` is analyzable.
1 parent 58d74fe commit bd21095

File tree

3 files changed

+263
-8
lines changed

3 files changed

+263
-8
lines changed

llvm/lib/CodeGen/MachineBasicBlock.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,14 +1425,14 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
14251425
// where both sides of the branches are always executed.
14261426

14271427
if (MF->getTarget().requiresStructuredCFG()) {
1428-
// If `Succ` is a loop header, splitting the critical edge will not
1429-
// break structured CFG.
1430-
if (MLI) {
1431-
const MachineLoop *L = MLI->getLoopFor(Succ);
1432-
return L && L->getHeader() == Succ;
1433-
}
1434-
1435-
return false;
1428+
if (!MLI)
1429+
return false;
1430+
const MachineLoop *L = MLI->getLoopFor(Succ);
1431+
// Only if `Succ` is a loop header, splitting the critical edge will not
1432+
// break structured CFG. And fallthrough to check if this's terminator is
1433+
// analyzable.
1434+
if (!L || L->getHeader() != Succ)
1435+
return false;
14361436
}
14371437

14381438
// Do we have an Indirect jump with a jumptable that we can rewrite?
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -o - %s -passes="require<machine-loops>,require<live-vars>,phi-node-elimination" | FileCheck %s
3+
4+
--- |
5+
target datalayout = "e-p6:32:32-i64:64-i128:128-i256:256-v16:16-v32:32-n16:32:64"
6+
target triple = "nvptx64-unknown-nvidiacl"
7+
8+
define void @func_26(i32 %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.) {
9+
entry:
10+
br label %for.cond
11+
12+
for.cond: ; preds = %BS_LABEL_1, %BS_LABEL_1, %entry
13+
%p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ]
14+
br label %BS_LABEL_1
15+
16+
BS_LABEL_2: ; preds = %BS_LABEL_1
17+
%sub = or i32 %p_2218_0.3, 1
18+
br label %for.cond4
19+
20+
for.cond4: ; preds = %BS_LABEL_1, %BS_LABEL_2
21+
%p_2218_0.2 = phi i32 [ %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0., %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ]
22+
br label %BS_LABEL_1
23+
24+
BS_LABEL_1: ; preds = %for.cond4, %for.cond
25+
%p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ]
26+
switch i32 %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0., label %unreachable [
27+
i32 0, label %for.cond4
28+
i32 4, label %BS_LABEL_2
29+
i32 1, label %for.cond
30+
i32 6, label %for.cond
31+
]
32+
33+
unreachable: ; preds = %BS_LABEL_1
34+
call void asm sideeffect "exit;", ""()
35+
unreachable
36+
}
37+
...
38+
---
39+
name: func_26
40+
alignment: 1
41+
exposesReturnsTwice: false
42+
legalized: false
43+
regBankSelected: false
44+
selected: false
45+
failedISel: false
46+
tracksRegLiveness: true
47+
hasWinCFI: false
48+
noPhis: false
49+
isSSA: true
50+
noVRegs: false
51+
hasFakeUses: false
52+
callsEHReturn: false
53+
callsUnwindInit: false
54+
hasEHContTarget: false
55+
hasEHScopes: false
56+
hasEHFunclets: false
57+
isOutlined: false
58+
debugInstrRef: false
59+
failsVerification: false
60+
tracksDebugUserValues: false
61+
registers:
62+
- { id: 0, class: b32, preferred-register: '', flags: [ ] }
63+
- { id: 1, class: b32, preferred-register: '', flags: [ ] }
64+
- { id: 2, class: b32, preferred-register: '', flags: [ ] }
65+
- { id: 3, class: b32, preferred-register: '', flags: [ ] }
66+
- { id: 4, class: b32, preferred-register: '', flags: [ ] }
67+
- { id: 5, class: b32, preferred-register: '', flags: [ ] }
68+
- { id: 6, class: b32, preferred-register: '', flags: [ ] }
69+
- { id: 7, class: b1, preferred-register: '', flags: [ ] }
70+
- { id: 8, class: b32, preferred-register: '', flags: [ ] }
71+
- { id: 9, class: b1, preferred-register: '', flags: [ ] }
72+
- { id: 10, class: b32, preferred-register: '', flags: [ ] }
73+
- { id: 11, class: b1, preferred-register: '', flags: [ ] }
74+
liveins: []
75+
frameInfo:
76+
isFrameAddressTaken: false
77+
isReturnAddressTaken: false
78+
hasStackMap: false
79+
hasPatchPoint: false
80+
stackSize: 0
81+
offsetAdjustment: 0
82+
maxAlignment: 1
83+
adjustsStack: false
84+
hasCalls: false
85+
stackProtector: ''
86+
functionContext: ''
87+
maxCallFrameSize: 4294967295
88+
cvBytesOfCalleeSavedRegisters: 0
89+
hasOpaqueSPAdjustment: false
90+
hasVAStart: false
91+
hasMustTailInVarArgFunc: false
92+
hasTailCall: false
93+
isCalleeSavedInfoValid: false
94+
localFrameSize: 0
95+
fixedStack: []
96+
stack: []
97+
entry_values: []
98+
callSites: []
99+
debugValueSubstitutions: []
100+
constants: []
101+
machineFunctionInfo: {}
102+
jumpTable:
103+
kind: inline
104+
entries:
105+
- id: 0
106+
blocks: [ '%bb.3', '%bb.1', '%bb.6', '%bb.6', '%bb.2', '%bb.6',
107+
'%bb.1' ]
108+
body: |
109+
; CHECK-LABEL: name: func_26
110+
; CHECK: bb.0:
111+
; CHECK-NEXT: successors: %bb.1(0x80000000)
112+
; CHECK-NEXT: {{ $}}
113+
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:b32 = IMPLICIT_DEF
114+
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:b1 = IMPLICIT_DEF
115+
; CHECK-NEXT: {{ $}}
116+
; CHECK-NEXT: bb.1:
117+
; CHECK-NEXT: successors: %bb.4(0x80000000)
118+
; CHECK-NEXT: {{ $}}
119+
; CHECK-NEXT: dead [[DEF2:%[0-9]+]]:b32 = IMPLICIT_DEF
120+
; CHECK-NEXT: GOTO %bb.4
121+
; CHECK-NEXT: {{ $}}
122+
; CHECK-NEXT: bb.2:
123+
; CHECK-NEXT: successors: %bb.3(0x80000000)
124+
; CHECK-NEXT: {{ $}}
125+
; CHECK-NEXT: bb.3:
126+
; CHECK-NEXT: successors: %bb.4(0x80000000)
127+
; CHECK-NEXT: {{ $}}
128+
; CHECK-NEXT: bb.4:
129+
; CHECK-NEXT: successors: %bb.6(0x00000000), %bb.5(0x80000000)
130+
; CHECK-NEXT: {{ $}}
131+
; CHECK-NEXT: CBranch undef [[DEF1]], %bb.6
132+
; CHECK-NEXT: {{ $}}
133+
; CHECK-NEXT: bb.5:
134+
; CHECK-NEXT: successors: %bb.3(0x3e000000), %bb.1(0x04000000), %bb.6(0x00000000), %bb.2(0x3e000000)
135+
; CHECK-NEXT: {{ $}}
136+
; CHECK-NEXT: BRX_START 0
137+
; CHECK-NEXT: BRX_ITEM %bb.3
138+
; CHECK-NEXT: BRX_ITEM %bb.1
139+
; CHECK-NEXT: BRX_ITEM %bb.6
140+
; CHECK-NEXT: BRX_ITEM %bb.6
141+
; CHECK-NEXT: BRX_ITEM %bb.2
142+
; CHECK-NEXT: BRX_ITEM %bb.6
143+
; CHECK-NEXT: BRX_END %bb.1, undef [[DEF]], 0
144+
; CHECK-NEXT: {{ $}}
145+
; CHECK-NEXT: bb.6:
146+
bb.0:
147+
successors: %bb.1(0x80000000)
148+
149+
%10:b32 = IMPLICIT_DEF
150+
%11:b1 = IMPLICIT_DEF
151+
152+
bb.1:
153+
successors: %bb.4(0x80000000)
154+
155+
%0:b32 = PHI undef %10, %bb.0, undef %0, %bb.5
156+
GOTO %bb.4
157+
158+
bb.2:
159+
successors: %bb.3(0x80000000)
160+
161+
bb.3:
162+
successors: %bb.4(0x80000000)
163+
164+
bb.4:
165+
successors: %bb.6(0x00000000), %bb.5(0x80000000)
166+
167+
CBranch undef %11, %bb.6
168+
169+
bb.5:
170+
successors: %bb.3(0x3e000000), %bb.1(0x04000000), %bb.6(0x00000000), %bb.2(0x3e000000)
171+
172+
BRX_START 0
173+
BRX_ITEM %bb.3
174+
BRX_ITEM %bb.1
175+
BRX_ITEM %bb.6
176+
BRX_ITEM %bb.6
177+
BRX_ITEM %bb.2
178+
BRX_ITEM %bb.6
179+
BRX_END %bb.1, undef %10, 0
180+
181+
bb.6:
182+
...

llvm/test/CodeGen/NVPTX/switch.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
3+
4+
target triple = "nvptx64-unknown-nvidiacl"
5+
6+
define void @pr170051(i32 %cond) {
7+
; CHECK-LABEL: pr170051(
8+
; CHECK: {
9+
; CHECK-NEXT: .reg .pred %p<2>;
10+
; CHECK-NEXT: .reg .b32 %r<4>;
11+
; CHECK-EMPTY:
12+
; CHECK-NEXT: // %bb.0: // %entry
13+
; CHECK-NEXT: mov.b32 %r2, 0;
14+
; CHECK-NEXT: ld.param.b32 %r1, [pr170051_param_0];
15+
; CHECK-NEXT: setp.gt.u32 %p1, %r1, 6;
16+
; CHECK-NEXT: bra.uni $L__BB0_3;
17+
; CHECK-NEXT: $L__BB0_1: // %BS_LABEL_2
18+
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
19+
; CHECK-NEXT: or.b32 %r3, %r2, 1;
20+
; CHECK-NEXT: $L__BB0_2: // %for.cond4
21+
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
22+
; CHECK-NEXT: mov.b32 %r2, %r3;
23+
; CHECK-NEXT: $L__BB0_3: // %BS_LABEL_1
24+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
25+
; CHECK-NEXT: @%p1 bra $L__BB0_5;
26+
; CHECK-NEXT: // %bb.4: // %BS_LABEL_1
27+
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
28+
; CHECK-NEXT: mov.b32 %r3, %r1;
29+
; CHECK-NEXT: $L_brx_0: .branchtargets
30+
; CHECK-NEXT: $L__BB0_2,
31+
; CHECK-NEXT: $L__BB0_3,
32+
; CHECK-NEXT: $L__BB0_5,
33+
; CHECK-NEXT: $L__BB0_5,
34+
; CHECK-NEXT: $L__BB0_1,
35+
; CHECK-NEXT: $L__BB0_5,
36+
; CHECK-NEXT: $L__BB0_3;
37+
; CHECK-NEXT: brx.idx %r1, $L_brx_0;
38+
; CHECK-NEXT: $L__BB0_5: // %unreachable
39+
; CHECK-NEXT: // begin inline asm
40+
; CHECK-NEXT: exit;
41+
; CHECK-NEXT: // end inline asm
42+
entry:
43+
br label %for.cond
44+
45+
for.cond: ; preds = %for.cond4.for.cond_crit_edge, %BS_LABEL_1, %BS_LABEL_1, %entry
46+
%p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ], [ poison, %for.cond4.for.cond_crit_edge ]
47+
br label %BS_LABEL_1
48+
49+
BS_LABEL_2: ; preds = %BS_LABEL_1
50+
%sub = or i32 %p_2218_0.3, 1
51+
br label %for.cond4
52+
53+
for.cond4: ; preds = %BS_LABEL_1, %BS_LABEL_2
54+
%p_2218_0.2 = phi i32 [ 0, %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ]
55+
br i1 false, label %for.cond4.for.cond_crit_edge, label %BS_LABEL_1
56+
57+
for.cond4.for.cond_crit_edge: ; preds = %for.cond4
58+
br label %for.cond
59+
60+
BS_LABEL_1: ; preds = %for.cond4, %for.cond
61+
%p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ]
62+
switch i32 %cond, label %unreachable [
63+
i32 0, label %for.cond4
64+
i32 4, label %BS_LABEL_2
65+
i32 1, label %for.cond
66+
i32 6, label %for.cond
67+
]
68+
69+
unreachable: ; preds = %BS_LABEL_1
70+
unreachable
71+
}
72+
73+

0 commit comments

Comments
 (0)