Skip to content

Commit 42ce8b7

Browse files
committed
[MachineOutliner] Preserve regmasks in calls to outlined functions
When emitting calls to an outlined function, the register masks from the outlined sequence are lost. The AArch64CollectLOH pass, which I plan to move to PreEmitPass2 (positioned after MachineOutliner), relies on accurate register masks. This patch ensures that regmasks are correctly preserved in the outlined calls, maintaining the required accuracy for subsequent passes.
1 parent 4cb2a51 commit 42ce8b7

File tree

2 files changed

+119
-0
lines changed

2 files changed

+119
-0
lines changed

llvm/lib/CodeGen/MachineOutliner.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,7 @@ bool MachineOutliner::outline(
11171117
// instruction. It also updates call site information for moved
11181118
// code.
11191119
SmallSet<Register, 2> UseRegs, DefRegs;
1120+
SmallPtrSet<const uint32_t *, 2> RegMasks;
11201121
// Copy over the defs in the outlined range.
11211122
// First inst in outlined range <-- Anything that's defined in this
11221123
// ... .. range has to be added as an
@@ -1130,6 +1131,12 @@ bool MachineOutliner::outline(
11301131
MachineInstr *MI = &*Iter;
11311132
SmallSet<Register, 2> InstrUseRegs;
11321133
for (MachineOperand &MOP : MI->operands()) {
1134+
// Collect all regmasks. Merge them in the end.
1135+
if (MOP.isRegMask()) {
1136+
RegMasks.insert(MOP.getRegMask());
1137+
continue;
1138+
}
1139+
11331140
// Skip over anything that isn't a register.
11341141
if (!MOP.isReg())
11351142
continue;
@@ -1153,6 +1160,24 @@ bool MachineOutliner::outline(
11531160
MI->getMF()->eraseCallSiteInfo(MI);
11541161
}
11551162

1163+
if (!RegMasks.empty()) {
1164+
if (RegMasks.size() == 1) {
1165+
CallInst->addOperand(
1166+
MachineOperand::CreateRegMask(*RegMasks.begin()));
1167+
} else {
1168+
uint32_t *RegMask = MF->allocateRegMask();
1169+
unsigned NumRegs =
1170+
MF->getSubtarget().getRegisterInfo()->getNumRegs();
1171+
unsigned Size = MachineOperand::getRegMaskSize(NumRegs);
1172+
memset(RegMask, UINT32_MAX, Size * sizeof(RegMask[0]));
1173+
for (const uint32_t *Mask : RegMasks) {
1174+
for (unsigned I = 0; I < Size; ++I)
1175+
RegMask[I] &= Mask[I];
1176+
}
1177+
CallInst->addOperand(MachineOperand::CreateRegMask(RegMask));
1178+
}
1179+
}
1180+
11561181
for (const Register &I : DefRegs)
11571182
// If it's a def, add it to the call instruction.
11581183
CallInst->addOperand(
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# RUN: llc -mtriple=aarch64-apple-ios -run-pass=prologepilog -run-pass=machine-outliner %s -o - | FileCheck %s
2+
--- |
3+
declare swiftcc void @bar()
4+
declare void @baz(i32, i32, i32) #0
5+
6+
define void @test_same_regmask() #0 {
7+
ret void
8+
}
9+
define void @test_different_regmasks() #0 {
10+
ret void
11+
}
12+
define void @foo(i32, i32, i32, i32, i32, i32, i32, i32) #0 {
13+
ret void
14+
}
15+
16+
...
17+
---
18+
name: foo
19+
tracksRegLiveness: true
20+
body: |
21+
bb.0:
22+
RET undef $lr
23+
24+
25+
...
26+
---
27+
name: test_same_regmask
28+
tracksRegLiveness: true
29+
body: |
30+
; CHECK-LABEL: name: test_same_regmask
31+
; CHECK: bb.1:
32+
; CHECK-NEXT: BL @OUTLINED_FUNCTION_1, csr_aarch64_aapcs
33+
34+
bb.0:
35+
$sp = frame-setup SUBXri $sp, 16, 0
36+
37+
bb.1:
38+
$w0 = MOVZWi 1, 0
39+
$w1 = MOVZWi 2, 0
40+
$w2 = MOVZWi 3, 0
41+
BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp
42+
$w0 = MOVZWi 1, 0
43+
$w1 = MOVZWi 2, 0
44+
$w2 = MOVZWi 3, 0
45+
BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit-def $sp
46+
$sp = ADDXri $sp, 16, 0
47+
RET undef $lr
48+
49+
50+
...
51+
---
52+
name: test_different_regmasks
53+
tracksRegLiveness: true
54+
body: |
55+
; CHECK-LABEL: name: test_different_regmasks
56+
; CHECK: bb.1:
57+
; CHECK-NEXT: BL @OUTLINED_FUNCTION_0, CustomRegMask($fp,$lr,$wzr,$wzr_hi,$xzr,$b8,$b9,$b10,$b11,$b12,$b13,$b14,$b15,$d8,$d9,$d10,$d11,$d12,$d13,$d14,$d15,$h8,$h9,$h10,$h11,$h12,$h13,$h14,$h15,$s8,$s9,$s10,$s11,$s12,$s13,$s14,$s15,$w19,$w20,$w22,$w23,$w24,$w25,$w26,$w27,$w28,$w29,$w30,$x19,$x20,$x22,$x23,$x24,$x25,$x26,$x27,$x28,$b8_hi,$b9_hi,$b10_hi,$b11_hi,$b12_hi,$b13_hi,$b14_hi,$b15_hi,$h8_hi,$h9_hi,$h10_hi,$h11_hi,$h12_hi,$h13_hi,$h14_hi,$h15_hi,$s8_hi,$s9_hi,$s10_hi,$s11_hi,$s12_hi,$s13_hi,$s14_hi,$s15_hi,$w19_hi,$w20_hi,$w22_hi,$w23_hi,$w24_hi,$w25_hi,$w26_hi,$w27_hi,$w28_hi,$w29_hi,$w30_hi,$d8_d9,$d9_d10,$d10_d11,$d11_d12,$d12_d13,$d13_d14,$d14_d15,$d8_d9_d10_d11,$d9_d10_d11_d12,$d10_d11_d12_d13,$d11_d12_d13_d14,$d12_d13_d14_d15,$d8_d9_d10,$d9_d10_d11,$d10_d11_d12,$d11_d12_d13,$d12_d13_d14,$d13_d14_d15,$x22_x23_x24_x25_x26_x27_x28_fp,$w22_w23,$w24_w25,$w26_w27,$w28_w29,$x28_fp,$x22_x23,$x24_x25,$x26_x27)
58+
59+
bb.0:
60+
$sp = frame-setup SUBXri $sp, 16, 0
61+
62+
bb.1:
63+
$w0 = MOVZWi 1, 0
64+
$w1 = MOVZWi 2, 0
65+
$w2 = MOVZWi 3, 0
66+
$w3 = MOVZWi 4, 0
67+
$w4 = MOVZWi 5, 0
68+
$w5 = MOVZWi 6, 0
69+
$w6 = MOVZWi 7, 0
70+
$w7 = MOVZWi 8, 0
71+
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
72+
BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
73+
$w0 = MOVZWi 1, 0
74+
$w1 = MOVZWi 2, 0
75+
$w2 = MOVZWi 3, 0
76+
$w3 = MOVZWi 4, 0
77+
$w4 = MOVZWi 5, 0
78+
$w5 = MOVZWi 6, 0
79+
$w6 = MOVZWi 7, 0
80+
$w7 = MOVZWi 8, 0
81+
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
82+
BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
83+
$w0 = MOVZWi 1, 0
84+
$w1 = MOVZWi 2, 0
85+
$w2 = MOVZWi 3, 0
86+
$w3 = MOVZWi 4, 0
87+
$w4 = MOVZWi 5, 0
88+
$w5 = MOVZWi 6, 0
89+
$w6 = MOVZWi 7, 0
90+
$w7 = MOVZWi 8, 0
91+
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7, implicit-def $sp
92+
BL @bar, csr_aarch64_aapcs_swifterror, implicit-def dead $lr, implicit $sp, implicit-def $sp
93+
$sp = ADDXri $sp, 16, 0
94+
RET undef $lr

0 commit comments

Comments
 (0)