Mark a handful of functions on the calling-into-Wasm path as #[inline]

fitzgen · fitzgen · commit f09764e1529e · 2025-04-22T14:42:41.000-07:00
This provides an improvement across the board for our `sync/no-hook` benchmarks:

&lt;details&gt;

&lt;summary&gt;Benchmark Results&lt;/summary&gt;

```
$ cargo bench --profile profiling --bench call '\bsync/no-hook' -- --baseline main
    Finished `profiling` profile [optimized + debuginfo] target(s) in 0.28s
     Running benches/call.rs (target/profiling/deps/call-b0a2bedd3336ad76)
sync/no-hook/core - host-to-wasm - typed - nop
                        time:   [27.334 ns 27.499 ns 27.668 ns]
                        change: [-16.388% -14.870% -13.479%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
  5 (5.00%) high mild
  2 (2.00%) high severe
sync/no-hook/core - host-to-wasm - untyped - nop
                        time:   [44.141 ns 44.429 ns 44.757 ns]
                        change: [-18.380% -17.041% -15.670%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
  1 (1.00%) high mild
  2 (2.00%) high severe
sync/no-hook/core - host-to-wasm - unchecked - nop
                        time:   [29.731 ns 29.983 ns 30.262 ns]
                        change: [-25.104% -22.176% -19.159%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
  5 (5.00%) high mild
  2 (2.00%) high severe
sync/no-hook/core - host-to-wasm - typed - nop-params-and-results
                        time:   [28.990 ns 29.143 ns 29.303 ns]
                        change: [-25.804% -24.562% -23.372%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
  3 (3.00%) high mild
  3 (3.00%) high severe
sync/no-hook/core - host-to-wasm - untyped - nop-params-and-results
                        time:   [110.00 ns 110.65 ns 111.46 ns]
                        change: [-11.967% -9.0070% -6.1347%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
  2 (2.00%) high mild
  5 (5.00%) high severe
sync/no-hook/core - host-to-wasm - unchecked - nop-params-and-results
                        time:   [58.828 ns 59.089 ns 59.418 ns]
                        change: [-15.596% -13.573% -11.484%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
  3 (3.00%) high severe

sync/no-hook/core - wasm-to-host - typed - nop
                        time:   [6.6209 ns 6.6615 ns 6.7077 ns]
                        change: [-53.555% -52.878% -52.116%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
  5 (5.00%) high mild
  1 (1.00%) high severe
sync/no-hook/core - wasm-to-host - typed - nop-params-and-results
                        time:   [7.9783 ns 8.0173 ns 8.0611 ns]
                        change: [-54.341% -53.947% -53.505%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
  3 (3.00%) high severe
sync/no-hook/core - wasm-to-host - untyped - nop
                        time:   [18.306 ns 18.393 ns 18.491 ns]
                        change: [-29.104% -28.127% -27.171%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
  3 (3.00%) high mild
  4 (4.00%) high severe
sync/no-hook/core - wasm-to-host - untyped - nop-params-and-results
                        time:   [67.741 ns 68.120 ns 68.601 ns]
                        change: [-26.453% -25.061% -23.663%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 12 outliers among 100 measurements (12.00%)
  6 (6.00%) high mild
  6 (6.00%) high severe
sync/no-hook/core - wasm-to-host - unchecked - nop
                        time:   [6.8379 ns 6.8915 ns 6.9566 ns]
                        change: [-55.623% -55.062% -54.481%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
  5 (5.00%) high mild
  2 (2.00%) high severe
sync/no-hook/core - wasm-to-host - unchecked - nop-params-and-results
                        time:   [27.856 ns 28.024 ns 28.214 ns]
                        change: [-17.320% -16.103% -15.038%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 10 outliers among 100 measurements (10.00%)
  6 (6.00%) high mild
  4 (4.00%) high severe

sync/no-hook/component - host-to-wasm - typed - nop
                        time:   [55.126 ns 55.506 ns 55.932 ns]
                        change: [-19.458% -18.098% -16.736%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 8 outliers among 100 measurements (8.00%)
  2 (2.00%) high mild
  6 (6.00%) high severe
sync/no-hook/component - host-to-wasm - untyped - nop
                        time:   [101.42 ns 102.06 ns 102.82 ns]
                        change: [-15.679% -14.108% -12.523%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 9 outliers among 100 measurements (9.00%)
  7 (7.00%) high mild
  2 (2.00%) high severe
sync/no-hook/component - host-to-wasm - typed - nop-params-and-results
                        time:   [61.482 ns 62.017 ns 62.591 ns]
                        change: [-16.576% -15.100% -13.595%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 10 outliers among 100 measurements (10.00%)
  9 (9.00%) high mild
  1 (1.00%) high severe
sync/no-hook/component - host-to-wasm - untyped - nop-params-and-results
                        time:   [223.50 ns 224.72 ns 226.05 ns]
                        change: [-21.732% -20.178% -18.679%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 4 outliers among 100 measurements (4.00%)
  1 (1.00%) high mild
  3 (3.00%) high severe

sync/no-hook/component - wasm-to-host - typed - nop
                        time:   [39.115 ns 39.295 ns 39.500 ns]
                        change: [-15.139% -13.886% -12.721%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 8 outliers among 100 measurements (8.00%)
  1 (1.00%) low mild
  2 (2.00%) high mild
  5 (5.00%) high severe
sync/no-hook/component - wasm-to-host - typed - nop-params-and-results
                        time:   [47.234 ns 47.458 ns 47.745 ns]
                        change: [-13.833% -11.951% -9.8784%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 9 outliers among 100 measurements (9.00%)
  3 (3.00%) high mild
  6 (6.00%) high severe
sync/no-hook/component - wasm-to-host - untyped - nop
                        time:   [52.311 ns 52.556 ns 52.817 ns]
                        change: [-12.736% -11.712% -10.693%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 9 outliers among 100 measurements (9.00%)
  4 (4.00%) high mild
  5 (5.00%) high severe
sync/no-hook/component - wasm-to-host - untyped - nop-params-and-results
                        time:   [239.71 ns 241.59 ns 244.11 ns]
                        change: [-29.804% -28.173% -26.415%] (p = 0.00 &lt; 0.05)
                        Performance has improved.
Found 9 outliers among 100 measurements (9.00%)
  4 (4.00%) high mild
  5 (5.00%) high severe
```

&lt;/details&gt;
diff --git a/.gitignore b/.gitignore
@@ -30,3 +30,4 @@ tests/all/pulley_provenance_test.cwasm
 /artifacts
 testcase*.wat
 testcase*.wasm
+perf.data*
diff --git a/crates/wasmtime/src/runtime/func.rs b/crates/wasmtime/src/runtime/func.rs
@@ -943,6 +943,7 @@ impl Func {
     /// `StoreOpaque` while the `FuncType` is also being used (from the
     /// perspective of the borrow-checker) because otherwise the signature would
     /// consider `StoreOpaque` borrowed mutable while `FuncType` is in use.
+    #[inline]
     fn ty_ref<'a>(&self, store: &'a mut StoreOpaque) -> (&'a FuncType, &'a StoreOpaque) {
         // If we haven't loaded our type into the store yet then do so lazily at
         // this time.
@@ -1178,6 +1179,7 @@ impl Func {
     /// This must be called just before `call_impl_do_call`.
     ///
     /// Returns whether we need to GC before calling `call_impl_do_call`.
+    #[inline]
     fn call_impl_check_args<T>(
         &self,
         store: &mut StoreContextMut<'_, T>,
@@ -1234,6 +1236,7 @@ impl Func {
     /// You must have type checked the arguments by calling
     /// `call_impl_check_args` immediately before calling this function. It is
     /// only safe to call this function if that one did not return an error.
+    #[inline]
     unsafe fn call_impl_do_call<T>(
         &self,
         store: &mut StoreContextMut<'_, T>,
@@ -1737,6 +1740,7 @@ impl EntryStoreContext {
     /// function through this type's `Drop` implementation. This ensures that we
     /// even restore the values if we unwind the stack (e.g., because we are
     /// panicing out of a Wasm execution).
+    #[inline]
     fn exit_wasm(&mut self) {
         unsafe {
             if let Some(limit) = self.stack_limit {
@@ -1751,6 +1755,7 @@ impl EntryStoreContext {
 }
 
 impl Drop for EntryStoreContext {
+    #[inline]
     fn drop(&mut self) {
         self.exit_wasm();
     }
diff --git a/crates/wasmtime/src/runtime/func/typed.rs b/crates/wasmtime/src/runtime/func/typed.rs
@@ -93,6 +93,7 @@ where
     /// connected to an asynchronous store.
     ///
     /// [`Trap`]: crate::Trap
+    #[inline]
     pub fn call(&self, mut store: impl AsContextMut, params: Params) -> Result<Results> {
         let mut store = store.as_context_mut();
         assert!(
@@ -179,6 +180,7 @@ where
     ///
     /// If `Self::need_gc_before_call_raw`, then the caller must have done a GC
     /// just before calling this method.
+    #[inline]
     pub(crate) unsafe fn call_raw<T>(
         store: &mut StoreContextMut<'_, T>,
         ty: &FuncType,
diff --git a/crates/wasmtime/src/runtime/types.rs b/crates/wasmtime/src/runtime/types.rs
@@ -2384,6 +2384,7 @@ impl FuncType {
     }
 
     #[cfg(feature = "gc")]
+    #[inline]
     pub(crate) fn as_wasm_func_type(&self) -> &WasmFuncType {
         self.registered_type.unwrap_func()
     }
diff --git a/crates/wasmtime/src/runtime/vm/interpreter.rs b/crates/wasmtime/src/runtime/vm/interpreter.rs
@@ -82,6 +82,7 @@ impl InterpreterRef<'_> {
     /// The `bytecode` pointer should previously have been produced by Cranelift
     /// and `callee` / `caller` / `args_and_results` are normal array-call
     /// arguments being passed around.
+    #[inline(never)]
     pub unsafe fn call(
         mut self,
         mut bytecode: NonNull<u8>,
diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs
@@ -833,6 +833,7 @@ impl VMFuncRef {
     ///
     /// Note that the unsafety invariants to maintain here are not currently
     /// exhaustively documented.
+    #[inline]
     pub unsafe fn array_call(
         &self,
         pulley: Option<InterpreterRef<'_>>,
@@ -867,6 +868,7 @@ impl VMFuncRef {
         )
     }
 
+    #[inline]
     unsafe fn array_call_native(
         &self,
         caller: NonNull<VMOpaqueContext>,

Original file line number	Diff line number	Diff line change
`@@ -2384,6 +2384,7 @@ impl FuncType {`
`2384`	`2384`	`}`
`2385`	`2385`
`2386`	`2386`	`#[cfg(feature = "gc")]`
	`2387`	`+ #[inline]`
`2387`	`2388`	`pub(crate) fn as_wasm_func_type(&self) -> &WasmFuncType {`
`2388`	`2389`	`self.registered_type.unwrap_func()`
`2389`	`2390`	`}`
Original file line number	Diff line number	Diff line change
`@@ -833,6 +833,7 @@ impl VMFuncRef {`
`833`	`833`	`///`
`834`	`834`	`/// Note that the unsafety invariants to maintain here are not currently`
`835`	`835`	`/// exhaustively documented.`
	`836`	`+ #[inline]`
`836`	`837`	`pub unsafe fn array_call(`
`837`	`838`	`&self,`
`838`	`839`	`pulley: Option<InterpreterRef<'_>>,`
`@@ -867,6 +868,7 @@ impl VMFuncRef {`
`867`	`868`	`)`
`868`	`869`	`}`
`869`	`870`
	`871`	`+ #[inline]`
`870`	`872`	`unsafe fn array_call_native(`
`871`	`873`	`&self,`
`872`	`874`	`caller: NonNull<VMOpaqueContext>,`