Skip to content

Commit aad0172

Browse files
feat(nightly): execution becomes faster (#2013)
Uses the nightly `become` keyword to tell LLVM to `musttail`. This did not work at first when the `Handler` type returned `Result` but after I changed it to return nothing it does. Ideas taken from https://github.com/xacrimon/tcvm/blob/main/src/interp.rs To remove code duplication, I created local declarative macros `dispatch!` for any "enum dispatch" logic of function pointers we were doing. The benefit is that this is now shared across four functions: {e1,e2} x {pre_compute,handler}. I did not make a single general purpose macro because Rust token parsing rules actually make this hard because it doesn't let you reference ident's you don't declare, and the ident's used actually vary based on the function. Removed `pc_base` and just leave some empty space to avoid `pc - pc_base` calculation. This is because `self.pc_base` is a runtime variable, so we don't want to have to use a register (or worse load/store) to access it. To complete this: - [x] For each crate, add "tco" feature, and then add `#[create_tco_handler]` attribute above `execute_e1_impl` functions. Then for each `Executor` implementation, copy the `pre_compute` function implementation verbatim but switch to `handler` function signature and return the tco handler fn pointer instead. - [x] Do the same for metered execution. - [x] ~~Switch to x86 global asm instead of relying on LLVM if we want to be extra safe.~~ this seemed complicated and hard to do fully properly so I prefer `become`. Closes INT-4309
1 parent 4852493 commit aad0172

File tree

75 files changed

+2576
-1033
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+2576
-1033
lines changed

.github/workflows/benchmark-call.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ on:
107107
env:
108108
S3_METRICS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/metrics
109109
S3_FLAMEGRAPHS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/flamegraphs
110-
FEATURE_FLAGS: "metrics,parallel,nightly-features"
110+
FEATURE_FLAGS: "metrics,parallel,nightly-features,tco"
111111
INPUT_ARGS: ""
112112
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
113113

.github/workflows/benchmarks-execute.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@ name: "Execution benchmarks"
22

33
on:
44
push:
5-
# TODO(ayush): remove after feat/new-execution is merged
6-
branches: ["main", "feat/new-execution"]
5+
branches: ["main"]
76
pull_request:
87
types: [opened, synchronize, reopened, labeled]
98
branches: ["**"]
@@ -28,6 +27,7 @@ env:
2827
CARGO_TERM_COLOR: always
2928
S3_FIXTURES_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/fixtures
3029
JEMALLOC_SYS_WITH_MALLOC_CONF: "retain:true,background_thread:true,metadata_thp:always,thp:always,dirty_decay_ms:10000,muzzy_decay_ms:10000,abort_conf:true"
30+
TOOLCHAIN: "+nightly-2025-08-19"
3131

3232
jobs:
3333
codspeed-walltime-benchmarks:
@@ -66,12 +66,12 @@ jobs:
6666

6767
- name: Build benchmarks
6868
working-directory: benchmarks/execute
69-
run: cargo codspeed build --profile maxperf
69+
run: cargo $TOOLCHAIN codspeed build --profile maxperf --features tco
7070
- name: Run benchmarks
7171
uses: CodSpeedHQ/action@v3
7272
with:
7373
working-directory: benchmarks/execute
74-
run: cargo codspeed run
74+
run: cargo $TOOLCHAIN codspeed run
7575
token: ${{ secrets.CODSPEED_TOKEN }}
7676

7777
codspeed-instrumentation-benchmarks:
@@ -111,10 +111,10 @@ jobs:
111111

112112
- name: Build benchmarks
113113
working-directory: benchmarks/execute
114-
run: cargo codspeed build
114+
run: cargo $TOOLCHAIN codspeed build --features tco
115115
- name: Run benchmarks
116116
uses: CodSpeedHQ/action@v3
117117
with:
118118
working-directory: benchmarks/execute
119-
run: cargo codspeed run
119+
run: cargo $TOOLCHAIN codspeed run
120120
token: ${{ secrets.CODSPEED_TOKEN }}

benchmarks/execute/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ divan = { package = "codspeed-divan-compat", version = "3.0.2" }
4646

4747
[features]
4848
default = ["jemalloc"]
49+
tco = ["openvm-sdk/tco"]
4950
mimalloc = ["openvm-circuit/mimalloc"]
5051
jemalloc = ["openvm-circuit/jemalloc"]
5152
jemalloc-prof = ["openvm-circuit/jemalloc-prof"]

benchmarks/prove/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ metrics.workspace = true
3333
[dev-dependencies]
3434

3535
[features]
36-
default = ["parallel", "jemalloc", "metrics", "evm"]
36+
default = ["parallel", "jemalloc", "metrics"]
3737
metrics = ["openvm-sdk/metrics"]
38+
tco = ["openvm-sdk/tco"]
3839
perf-metrics = ["openvm-sdk/perf-metrics", "metrics"]
3940
stark-debug = ["openvm-sdk/stark-debug"]
4041
# runs leaf aggregation benchmarks:

ci/scripts/bench.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@ def run_cargo_command(
1515
kzg_params_dir,
1616
profile="release"
1717
):
18+
toolchain = "+1.86"
19+
if "tco" in feature_flags:
20+
toolchain = "+nightly-2025-08-19"
1821
# Command to run (for best performance but slower builds, use --profile maxperf)
1922
command = [
20-
"cargo", "run", "--no-default-features", "-p", "openvm-benchmarks-prove", "--bin", bin_name, "--profile", profile, "--features", ",".join(feature_flags), "--"
23+
"cargo", toolchain, "run", "--no-default-features", "-p", "openvm-benchmarks-prove", "--bin", bin_name, "--profile", profile, "--features", ",".join(feature_flags), "--"
2124
]
2225

2326
if app_log_blowup is not None:

crates/cli/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ default = ["parallel", "jemalloc", "evm-verify", "metrics"]
4545
evm-prove = ["openvm-sdk/evm-prove"]
4646
evm-verify = ["evm-prove", "openvm-sdk/evm-verify"]
4747
metrics = ["openvm-sdk/metrics"]
48+
tco = ["openvm-sdk/tco"]
4849
# for guest profiling:
4950
perf-metrics = ["openvm-sdk/perf-metrics", "metrics"]
5051
# performance features:

crates/cli/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
#![cfg_attr(feature = "tco", allow(incomplete_features))]
2+
#![cfg_attr(feature = "tco", feature(explicit_tail_calls))]
3+
14
pub mod commands;
25
pub mod default;
36
pub mod input;

crates/sdk/Cargo.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,17 @@ metrics = [
7979
"openvm-native-recursion/metrics",
8080
"openvm-native-compiler/metrics",
8181
]
82+
tco = [
83+
"openvm-circuit/tco",
84+
"openvm-rv32im-circuit/tco",
85+
"openvm-native-circuit/tco",
86+
"openvm-sha256-circuit/tco",
87+
"openvm-keccak256-circuit/tco",
88+
"openvm-bigint-circuit/tco",
89+
"openvm-algebra-circuit/tco",
90+
"openvm-ecc-circuit/tco",
91+
"openvm-pairing-circuit/tco"
92+
]
8293
# for guest profiling:
8394
perf-metrics = ["openvm-circuit/perf-metrics", "openvm-transpiler/function-span"]
8495
# turns on stark-backend debugger in all proofs

crates/sdk/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#![cfg_attr(feature = "tco", allow(incomplete_features))]
2+
#![cfg_attr(feature = "tco", feature(explicit_tail_calls))]
13
use std::{
24
borrow::Borrow,
35
fs::read,

crates/vm/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ basic-memory = []
6868
# turns on stark-backend debugger in all proofs
6969
stark-debug = []
7070
test-utils = ["openvm-stark-sdk"]
71+
# Tail call optimizations. This requires nightly for the `become` keyword (https://github.com/rust-lang/rust/pull/144232).
72+
# However tail call elimination is still an incomplete feature in Rust, so the `tco` feature remains experimental until then.
73+
tco = ["openvm-circuit-derive/tco"]
7174
# performance features:
7275
mimalloc = ["openvm-stark-backend/mimalloc"]
7376
jemalloc = ["openvm-stark-backend/jemalloc"]

0 commit comments

Comments
 (0)