Skip to content

Commit 745e1e6

Browse files
wenju-hearsenm
andauthored
[CodeGen] Ignore requiresStructuredCFG check in canSplitCriticalEdge if successor is loop header (#154063)
This addresses a performance issue for our downstream GPU target that sets requiresStructuredCFG to true. The issue is that EarlyMachineLICM pass does not hoist loop invariants because a critical edge is not split. The critical edge's destination a loop header. Splitting the critical edge will not break structured CFG. Add a nvptx test to demonstrate the issue since the target also requires structured CFG. --------- Co-authored-by: Matt Arsenault <[email protected]>
1 parent d357e96 commit 745e1e6

File tree

3 files changed

+96
-5
lines changed

3 files changed

+96
-5
lines changed

llvm/include/llvm/CodeGen/MachineBasicBlock.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1040,7 +1040,9 @@ class MachineBasicBlock
10401040
/// Succ, can be split. If this returns true a subsequent call to
10411041
/// SplitCriticalEdge is guaranteed to return a valid basic block if
10421042
/// no changes occurred in the meantime.
1043-
LLVM_ABI bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const;
1043+
LLVM_ABI bool
1044+
canSplitCriticalEdge(const MachineBasicBlock *Succ,
1045+
const MachineLoopInfo *MLI = nullptr) const;
10441046

10451047
void pop_front() { Insts.pop_front(); }
10461048
void pop_back() { Insts.pop_back(); }

llvm/lib/CodeGen/MachineBasicBlock.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,7 +1180,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
11801180
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
11811181
MachineBasicBlock *Succ, const SplitCriticalEdgeAnalyses &Analyses,
11821182
std::vector<SparseBitVector<>> *LiveInSets, MachineDomTreeUpdater *MDTU) {
1183-
if (!canSplitCriticalEdge(Succ))
1183+
if (!canSplitCriticalEdge(Succ, Analyses.MLI))
11841184
return nullptr;
11851185

11861186
MachineFunction *MF = getParent();
@@ -1408,8 +1408,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
14081408
return NMBB;
14091409
}
14101410

1411-
bool MachineBasicBlock::canSplitCriticalEdge(
1412-
const MachineBasicBlock *Succ) const {
1411+
bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ,
1412+
const MachineLoopInfo *MLI) const {
14131413
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
14141414
// it in this generic function.
14151415
if (Succ->isEHPad())
@@ -1423,8 +1423,17 @@ bool MachineBasicBlock::canSplitCriticalEdge(
14231423
const MachineFunction *MF = getParent();
14241424
// Performance might be harmed on HW that implements branching using exec mask
14251425
// where both sides of the branches are always executed.
1426-
if (MF->getTarget().requiresStructuredCFG())
1426+
1427+
if (MF->getTarget().requiresStructuredCFG()) {
1428+
// If `Succ` is a loop header, splitting the critical edge will not
1429+
// break structured CFG.
1430+
if (MLI) {
1431+
const MachineLoop *L = MLI->getLoopFor(Succ);
1432+
return L && L->getHeader() == Succ;
1433+
}
1434+
14271435
return false;
1436+
}
14281437

14291438
// Do we have an Indirect jump with a jumptable that we can rewrite?
14301439
int JTI = findJumpTableIndex(*this);
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=nvptx64 -mcpu=sm_20 -run-pass=early-machinelicm %s -o - | FileCheck %s
3+
4+
# This test checks that the early-machineLICM pass successfully creates a new
5+
# loop preheader by splitting the critical edge and hoisting the loop invariant
6+
# value `%8` to the preheader.
7+
# Since the critical edge successor is a loop header, the splitting does not
8+
# break the structured CFG, which is a requirement for the NVPTX target.
9+
10+
---
11+
name: test_hoist
12+
tracksRegLiveness: true
13+
registers:
14+
- { id: 0, class: b64, preferred-register: '', flags: [ ] }
15+
- { id: 1, class: b32, preferred-register: '', flags: [ ] }
16+
- { id: 2, class: b32, preferred-register: '', flags: [ ] }
17+
- { id: 3, class: b32, preferred-register: '', flags: [ ] }
18+
- { id: 4, class: b32, preferred-register: '', flags: [ ] }
19+
- { id: 5, class: b32, preferred-register: '', flags: [ ] }
20+
- { id: 6, class: b64, preferred-register: '', flags: [ ] }
21+
- { id: 7, class: b1, preferred-register: '', flags: [ ] }
22+
- { id: 8, class: b32, preferred-register: '', flags: [ ] }
23+
- { id: 9, class: b1, preferred-register: '', flags: [ ] }
24+
body: |
25+
; CHECK-LABEL: name: test_hoist
26+
; CHECK: bb.0.entry:
27+
; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.3(0x50000000)
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: [[LD_i32_:%[0-9]+]]:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
30+
; CHECK-NEXT: [[LD_i64_:%[0-9]+]]:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
31+
; CHECK-NEXT: [[ADD64ri:%[0-9]+]]:b64 = nuw ADD64ri killed [[LD_i64_]], 2
32+
; CHECK-NEXT: [[LD_i32_1:%[0-9]+]]:b32 = LD_i32 0, 0, 1, 3, 32, [[ADD64ri]], 0
33+
; CHECK-NEXT: [[SETP_i32ri:%[0-9]+]]:b1 = SETP_i32ri [[LD_i32_]], 0, 0
34+
; CHECK-NEXT: CBranch killed [[SETP_i32ri]], %bb.2
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: bb.3:
37+
; CHECK-NEXT: successors: %bb.1(0x80000000)
38+
; CHECK-NEXT: {{ $}}
39+
; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:b32 = ADD32ri [[LD_i32_]], -1
40+
; CHECK-NEXT: {{ $}}
41+
; CHECK-NEXT: bb.1:
42+
; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
43+
; CHECK-NEXT: {{ $}}
44+
; CHECK-NEXT: [[PHI:%[0-9]+]]:b32 = PHI [[LD_i32_1]], %bb.3, %3, %bb.1
45+
; CHECK-NEXT: [[SREM32rr:%[0-9]+]]:b32 = SREM32rr [[PHI]], [[ADD32ri]]
46+
; CHECK-NEXT: [[SETP_i32ri1:%[0-9]+]]:b1 = SETP_i32ri [[SREM32rr]], 0, 1
47+
; CHECK-NEXT: CBranch killed [[SETP_i32ri1]], %bb.1
48+
; CHECK-NEXT: GOTO %bb.2
49+
; CHECK-NEXT: {{ $}}
50+
; CHECK-NEXT: bb.2:
51+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:b32 = PHI [[LD_i32_1]], %bb.0, [[SREM32rr]], %bb.1
52+
; CHECK-NEXT: ST_i32 [[PHI1]], 0, 0, 1, 32, [[ADD64ri]], 0
53+
; CHECK-NEXT: Return
54+
bb.0.entry:
55+
successors: %bb.2(0x30000000), %bb.1(0x50000000)
56+
57+
%5:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101)
58+
%6:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101)
59+
%0:b64 = nuw ADD64ri killed %6, 2
60+
%1:b32 = LD_i32 0, 0, 1, 3, 32, %0, 0
61+
%7:b1 = SETP_i32ri %5, 0, 0
62+
CBranch killed %7, %bb.2
63+
GOTO %bb.1
64+
65+
66+
bb.1:
67+
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
68+
69+
%2:b32 = PHI %1, %bb.0, %3, %bb.1
70+
%8:b32 = ADD32ri %5, -1
71+
%3:b32 = SREM32rr %2, %8
72+
%9:b1 = SETP_i32ri %3, 0, 1
73+
CBranch killed %9, %bb.1
74+
GOTO %bb.2
75+
76+
bb.2:
77+
%4:b32 = PHI %1, %bb.0, %3, %bb.1
78+
ST_i32 %4, 0, 0, 1, 32, %0, 0
79+
Return
80+
...

0 commit comments

Comments
 (0)