Skip to content

Commit dcedcbf

Browse files
authored
Do not consider inlined function bodies for more inlining (#11462)
Due to our bottom-up, SCC-based approach to inlining, we've already performed inlining on the callee, and do not need to consider any further inlining within it. This also prevents us from inlining recursive callees all the way up to the size threshold, and instead just inline one layer.
1 parent 3fab18f commit dcedcbf

File tree

5 files changed

+108
-7348
lines changed

5 files changed

+108
-7348
lines changed

cranelift/codegen/src/inline.rs

Lines changed: 73 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,19 @@ type SmallBlockCallVec = SmallVec<[ir::BlockCall; 8]>;
3737
pub enum InlineCommand<'a> {
3838
/// Keep the call as-is, out-of-line, and do not inline the callee.
3939
KeepCall,
40+
4041
/// Inline the call, using this function as the body of the callee.
4142
///
4243
/// It is the `Inline` implementor's responsibility to ensure that this
4344
/// function is the correct callee. Providing the wrong function may result
4445
/// in panics during compilation or incorrect runtime behavior.
45-
Inline(Cow<'a, ir::Function>),
46+
Inline {
47+
/// The callee function's body.
48+
callee: Cow<'a, ir::Function>,
49+
/// Whether to visit any function calls within the callee body after
50+
/// inlining and consider them for further inlining.
51+
visit_callee: bool,
52+
},
4653
}
4754

4855
/// A trait for directing Cranelift whether to inline a particular call or not.
@@ -112,37 +119,45 @@ pub(crate) fn do_inlining(
112119
let mut allocs = InliningAllocs::default();
113120

114121
let mut cursor = FuncCursor::new(func);
115-
while let Some(block) = cursor.next_block() {
116-
// Always keep track of our previous cursor position. After we inline a
117-
// call, replacing the current position with an arbitrary sub-CFG, we
118-
// back up to this previous position. This makes sure our cursor is
119-
// always at a position that is inserted in the layout and also enables
120-
// multi-level inlining, if desired by the user, where we consider any
121-
// newly-inlined call instructions for further inlining.
122+
'block_loop: while let Some(block) = cursor.next_block() {
123+
// Always keep track of our previous cursor position. Assuming that the
124+
// current position is a function call that we will inline, then the
125+
// previous position is just before the inlined callee function. After
126+
// inlining a call, the Cranelift user can decide whether to consider
127+
// any function calls in the inlined callee for further inlining or
128+
// not. When they do, then we back up to this previous cursor position
129+
// so that our traversal will then continue over the inlined body.
122130
let mut prev_pos;
123131

124132
while let Some(inst) = {
125133
prev_pos = cursor.position();
126134
cursor.next_inst()
127135
} {
136+
// Make sure that `block` is always `inst`'s block, even with all of
137+
// our cursor-position-updating and block-splitting-during-inlining
138+
// shenanigans below.
139+
debug_assert_eq!(Some(block), cursor.func.layout.inst_block(inst));
140+
128141
match cursor.func.dfg.insts[inst] {
129142
ir::InstructionData::Call {
130143
opcode: opcode @ ir::Opcode::Call | opcode @ ir::Opcode::ReturnCall,
131144
args: _,
132145
func_ref,
133146
} => {
134-
let args = cursor.func.dfg.inst_args(inst);
135147
trace!(
136148
"considering call site for inlining: {inst}: {}",
137149
cursor.func.dfg.display_inst(inst),
138150
);
151+
let args = cursor.func.dfg.inst_args(inst);
139152
match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
140153
InlineCommand::KeepCall => {
141154
trace!(" --> keeping call");
142-
continue;
143155
}
144-
InlineCommand::Inline(callee) => {
145-
inline_one(
156+
InlineCommand::Inline {
157+
callee,
158+
visit_callee,
159+
} => {
160+
let last_inlined_block = inline_one(
146161
&mut allocs,
147162
cursor.func,
148163
func_ref,
@@ -153,7 +168,15 @@ pub(crate) fn do_inlining(
153168
None,
154169
);
155170
inlined_any = true;
156-
cursor.set_position(prev_pos);
171+
if visit_callee {
172+
cursor.set_position(prev_pos);
173+
} else {
174+
// Arrange it so that the `next_block()` loop
175+
// will continue to the next block that is not
176+
// associated with the just-inlined callee.
177+
cursor.goto_bottom(last_inlined_block);
178+
continue 'block_loop;
179+
}
157180
}
158181
}
159182
}
@@ -163,18 +186,20 @@ pub(crate) fn do_inlining(
163186
func_ref,
164187
exception,
165188
} => {
166-
let args = cursor.func.dfg.inst_args(inst);
167189
trace!(
168190
"considering call site for inlining: {inst}: {}",
169191
cursor.func.dfg.display_inst(inst),
170192
);
193+
let args = cursor.func.dfg.inst_args(inst);
171194
match inliner.inline(&cursor.func, inst, opcode, func_ref, args) {
172195
InlineCommand::KeepCall => {
173196
trace!(" --> keeping call");
174-
continue;
175197
}
176-
InlineCommand::Inline(callee) => {
177-
inline_one(
198+
InlineCommand::Inline {
199+
callee,
200+
visit_callee,
201+
} => {
202+
let last_inlined_block = inline_one(
178203
&mut allocs,
179204
cursor.func,
180205
func_ref,
@@ -185,11 +210,30 @@ pub(crate) fn do_inlining(
185210
Some(exception),
186211
);
187212
inlined_any = true;
188-
cursor.set_position(prev_pos);
213+
if visit_callee {
214+
cursor.set_position(prev_pos);
215+
} else {
216+
// Arrange it so that the `next_block()` loop
217+
// will continue to the next block that is not
218+
// associated with the just-inlined callee.
219+
cursor.goto_bottom(last_inlined_block);
220+
continue 'block_loop;
221+
}
189222
}
190223
}
191224
}
192-
_ => continue,
225+
ir::InstructionData::CallIndirect { .. }
226+
| ir::InstructionData::TryCallIndirect { .. } => {
227+
// Can't inline indirect calls; need to have some earlier
228+
// pass rewrite them into direct calls first, when possible.
229+
}
230+
_ => {
231+
debug_assert!(
232+
!cursor.func.dfg.insts[inst].opcode().is_call(),
233+
"should have matched all call instructions, but found: {inst}: {}",
234+
cursor.func.dfg.display_inst(inst),
235+
);
236+
}
193237
}
194238
}
195239
}
@@ -283,6 +327,8 @@ impl InliningAllocs {
283327
}
284328

285329
/// Inline one particular function call.
330+
///
331+
/// Returns the last inlined block in the layout.
286332
fn inline_one(
287333
allocs: &mut InliningAllocs,
288334
func: &mut ir::Function,
@@ -292,7 +338,7 @@ fn inline_one(
292338
call_opcode: ir::Opcode,
293339
callee: &ir::Function,
294340
call_exception_table: Option<ir::ExceptionTable>,
295-
) {
341+
) -> ir::Block {
296342
trace!(
297343
"Inlining call {call_inst:?}: {}\n\
298344
with callee = {callee:?}",
@@ -318,7 +364,7 @@ fn inline_one(
318364
// Prepare for translating the actual instructions by inserting the inlined
319365
// blocks into the caller's layout in the same order that they appear in the
320366
// callee.
321-
inline_block_layout(func, call_block, callee, &entity_map);
367+
let last_inlined_block = inline_block_layout(func, call_block, callee, &entity_map);
322368

323369
// Translate each instruction from the callee into the caller,
324370
// appending them to their associated block in the caller.
@@ -470,6 +516,8 @@ fn inline_one(
470516
if let Some(call_exception_table) = call_exception_table {
471517
fixup_inlined_call_exception_tables(allocs, func, call_exception_table);
472518
}
519+
520+
last_inlined_block
473521
}
474522

475523
/// Append stack map entries from the caller and callee to the given inlined
@@ -916,12 +964,14 @@ impl<'a> ir::instructions::InstructionMapper for InliningInstRemapper<'a> {
916964
}
917965

918966
/// Inline the callee's layout into the caller's layout.
967+
///
968+
/// Returns the last inlined block in the layout.
919969
fn inline_block_layout(
920970
func: &mut ir::Function,
921971
call_block: ir::Block,
922972
callee: &ir::Function,
923973
entity_map: &EntityMap,
924-
) {
974+
) -> ir::Block {
925975
// Iterate over callee blocks in layout order, inserting their associated
926976
// inlined block into the caller's layout.
927977
let mut prev_inlined_block = call_block;
@@ -934,6 +984,7 @@ fn inline_block_layout(
934984
prev_inlined_block = inlined_block;
935985
next_callee_block = callee.layout.next_block(callee_block);
936986
}
987+
prev_inlined_block
937988
}
938989

939990
/// Split the call instruction's block just after the call instruction to create

cranelift/filetests/src/test_inline.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,18 @@ impl<'a> Inline for Inliner<'a> {
136136
.and_then(|name| self.0.get(&ir::UserFuncName::User(name.clone())))
137137
{
138138
None => InlineCommand::KeepCall,
139-
Some(f) => InlineCommand::Inline(Cow::Borrowed(f)),
139+
Some(f) => InlineCommand::Inline {
140+
callee: Cow::Borrowed(f),
141+
visit_callee: true,
142+
},
140143
},
141144
ir::ExternalName::TestCase(name) => {
142145
match self.0.get(&ir::UserFuncName::Testcase(name.clone())) {
143146
None => InlineCommand::KeepCall,
144-
Some(f) => InlineCommand::Inline(Cow::Borrowed(f)),
147+
Some(f) => InlineCommand::Inline {
148+
callee: Cow::Borrowed(f),
149+
visit_callee: true,
150+
},
145151
}
146152
}
147153
ir::ExternalName::LibCall(_) | ir::ExternalName::KnownSymbol(_) => {

crates/cranelift/src/compiler.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -845,9 +845,13 @@ impl InliningCompiler for Compiler {
845845
.code
846846
.downcast_ref::<Option<CompilerContext>>()
847847
.unwrap();
848-
InlineCommand::Inline(Cow::Borrowed(
849-
&cx.as_ref().unwrap().codegen_context.func,
850-
))
848+
InlineCommand::Inline {
849+
callee: Cow::Borrowed(&cx.as_ref().unwrap().codegen_context.func),
850+
// We've already visited the callee for inlining
851+
// due to our bottom-up approach, no need to
852+
// visit it again.
853+
visit_callee: false,
854+
}
851855
}
852856
},
853857
_ => InlineCommand::KeepCall,

tests/disas/component-model/inlining-bug.wat

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,6 @@
4646
;; gv8 = vmctx
4747
;; gv9 = load.i64 notrap aligned readonly gv8+8
4848
;; gv10 = load.i64 notrap aligned gv9+16
49-
;; gv11 = vmctx
50-
;; gv12 = load.i64 notrap aligned readonly gv11+8
51-
;; gv13 = load.i64 notrap aligned gv12+16
5249
;; sig0 = (i64 vmctx, i64) -> i32 tail
5350
;; sig1 = (i64 vmctx, i64) -> i32 tail
5451
;; sig2 = (i64 vmctx, i64) tail
@@ -64,15 +61,9 @@
6461
;; jump block4
6562
;;
6663
;; block4:
67-
;; jump block7
68-
;;
69-
;; block7:
70-
;; jump block8
71-
;;
72-
;; block8:
73-
;; jump block9
74-
;;
75-
;; block9:
64+
;; @00d4 v4 = load.i64 notrap aligned readonly can_move v0+64
65+
;; v10 = load.i64 notrap aligned readonly can_move v4+88
66+
;; call fn2(v10, v10)
7667
;; jump block5
7768
;;
7869
;; block5:
@@ -82,9 +73,9 @@
8273
;; jump block3
8374
;;
8475
;; block3:
85-
;; jump block10
76+
;; jump block7
8677
;;
87-
;; block10:
78+
;; block7:
8879
;; @00d6 jump block1
8980
;;
9081
;; block1:

0 commit comments

Comments
 (0)