Skip to content
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4d9e261
chore: add placeholder tco feature
jonathanpwang Aug 19, 2025
13cc6f2
feat: add macro to generate tco handler and update interpreter for tco
jonathanpwang Aug 20, 2025
fb3d997
feat: rv32im tco without become keyword
jonathanpwang Aug 20, 2025
0c93530
fmt
jonathanpwang Aug 20, 2025
f7fd1d7
feat: tco for other extensions
jonathanpwang Aug 20, 2025
f01ccd2
chore: update feature deps
jonathanpwang Aug 20, 2025
5a193a0
fixes
jonathanpwang Aug 20, 2025
e6affb6
feat: simplify the handler type without Result
jonathanpwang Aug 20, 2025
9d9aa8a
feat: try become keyword again
jonathanpwang Aug 20, 2025
4c1de9d
chore: propagate tco feature
jonathanpwang Aug 20, 2025
76aa23d
feat: use custom macros to reduce code in ecc execution
jonathanpwang Aug 20, 2025
a82dfb2
refactor: use local dispatch! macros to reduce code duplication
jonathanpwang Aug 21, 2025
5c7b832
refactor: fp2 dispatch
jonathanpwang Aug 21, 2025
32c2dbd
chore: update feature comment
jonathanpwang Aug 20, 2025
eee6856
feat: metered handler for algebra extension
jonathanpwang Aug 21, 2025
c0d4d83
refactor: use local dispatch! macros to reduce code duplication
jonathanpwang Aug 21, 2025
bae6afe
feat: run! macro for tco on pure+metered execution
jonathanpwang Aug 21, 2025
6e2b0a2
chore: fmt
jonathanpwang Aug 21, 2025
714c97a
fix: missing handler for is_eq
jonathanpwang Aug 21, 2025
8d3d06e
feat: bigint metered handler
jonathanpwang Aug 21, 2025
312f6d5
refactor: use dispatch! for rv32 executors
jonathanpwang Aug 21, 2025
9a2df6e
cleanup: turn off tco feature
jonathanpwang Aug 21, 2025
09e05c9
ci: switch benchmarks to use tco
jonathanpwang Aug 21, 2025
fae5453
fix: proc-macro also needs tco feature
jonathanpwang Aug 21, 2025
a4566a7
chore: lint
jonathanpwang Aug 21, 2025
ce6981c
ci: benchmarks with tco feature
jonathanpwang Aug 21, 2025
6759a94
feat: use macro's "tco" feature
jonathanpwang Aug 21, 2025
68b12f5
chore: add instrumentation for VmState::initial
jonathanpwang Aug 21, 2025
d8ab8a6
Merge branch 'main' into feat/tco
jonathanpwang Aug 21, 2025
d265fe9
chore: cargo shear
jonathanpwang Aug 21, 2025
64c0571
fix: ci
jonathanpwang Aug 21, 2025
ce7c037
perf: remove pc_base from pc_idx calc
jonathanpwang Aug 22, 2025
63b013a
fix: remove unused error
jonathanpwang Aug 22, 2025
5f5138c
chore: don't keep pre_compute_insns when tco
jonathanpwang Aug 22, 2025
f46bf70
chore: remove unused derive
jonathanpwang Aug 22, 2025
8a51077
chore: dispatch! for poseidon2
jonathanpwang Aug 22, 2025
aee66b1
chore: phantom lifetime
jonathanpwang Aug 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/benchmark-call.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ on:
env:
S3_METRICS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/metrics
S3_FLAMEGRAPHS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/flamegraphs
FEATURE_FLAGS: "metrics,parallel,nightly-features"
FEATURE_FLAGS: "metrics,parallel,nightly-features,tco"
INPUT_ARGS: ""
CARGO_NET_GIT_FETCH_WITH_CLI: "true"

Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/benchmarks-execute.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ env:
CARGO_TERM_COLOR: always
S3_FIXTURES_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/fixtures
JEMALLOC_SYS_WITH_MALLOC_CONF: "retain:true,background_thread:true,metadata_thp:always,thp:always,dirty_decay_ms:10000,muzzy_decay_ms:10000,abort_conf:true"
TOOLCHAIN: "+nightly-2025-08-19"

jobs:
codspeed-walltime-benchmarks:
Expand Down Expand Up @@ -65,12 +66,12 @@ jobs:

- name: Build benchmarks
working-directory: benchmarks/execute
run: cargo codspeed build --profile maxperf
run: cargo $TOOLCHAIN codspeed build --profile maxperf
- name: Run benchmarks
uses: CodSpeedHQ/action@v3
with:
working-directory: benchmarks/execute
run: cargo codspeed run
run: cargo $TOOLCHAIN codspeed run
token: ${{ secrets.CODSPEED_TOKEN }}

codspeed-instrumentation-benchmarks:
Expand Down Expand Up @@ -110,10 +111,10 @@ jobs:

- name: Build benchmarks
working-directory: benchmarks/execute
run: cargo codspeed build
run: cargo $TOOLCHAIN codspeed build
- name: Run benchmarks
uses: CodSpeedHQ/action@v3
with:
working-directory: benchmarks/execute
run: cargo codspeed run
run: cargo $TOOLCHAIN codspeed run
token: ${{ secrets.CODSPEED_TOKEN }}
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ dashmap = "6.1.0"
memmap2 = "0.9.5"
libc = "0.2.175"
tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] }
paste = "1.0.15"

# default-features = false for no_std for use in guest programs
itertools = { version = "0.14.0", default-features = false }
Expand Down
1 change: 1 addition & 0 deletions benchmarks/execute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ divan = { package = "codspeed-divan-compat", version = "3.0.2" }

[features]
default = ["jemalloc"]
tco = ["openvm-sdk/tco"]
mimalloc = ["openvm-circuit/mimalloc"]
jemalloc = ["openvm-circuit/jemalloc"]
jemalloc-prof = ["openvm-circuit/jemalloc-prof"]
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/prove/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ metrics.workspace = true
[dev-dependencies]

[features]
default = ["parallel", "jemalloc", "metrics", "evm"]
default = ["parallel", "jemalloc", "metrics"]
metrics = ["openvm-sdk/metrics"]
tco = ["openvm-sdk/tco"]
perf-metrics = ["openvm-sdk/perf-metrics", "metrics"]
stark-debug = ["openvm-sdk/stark-debug"]
# runs leaf aggregation benchmarks:
Expand Down
5 changes: 4 additions & 1 deletion ci/scripts/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@ def run_cargo_command(
kzg_params_dir,
profile="release"
):
toolchain = "+1.86"
if "tco" in feature_flags:
toolchain = "+nightly-2025-08-19"
# Command to run (for best performance but slower builds, use --profile maxperf)
command = [
"cargo", "run", "--no-default-features", "-p", "openvm-benchmarks-prove", "--bin", bin_name, "--profile", profile, "--features", ",".join(feature_flags), "--"
"cargo", toolchain, "run", "--no-default-features", "-p", "openvm-benchmarks-prove", "--bin", bin_name, "--profile", profile, "--features", ",".join(feature_flags), "--"
]

if app_log_blowup is not None:
Expand Down
1 change: 1 addition & 0 deletions crates/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ default = ["parallel", "jemalloc", "evm-verify", "metrics"]
evm-prove = ["openvm-sdk/evm-prove"]
evm-verify = ["evm-prove", "openvm-sdk/evm-verify"]
metrics = ["openvm-sdk/metrics"]
tco = ["openvm-sdk/tco"]
# for guest profiling:
perf-metrics = ["openvm-sdk/perf-metrics", "metrics"]
# performance features:
Expand Down
3 changes: 3 additions & 0 deletions crates/cli/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#![cfg_attr(feature = "tco", allow(incomplete_features))]
#![cfg_attr(feature = "tco", feature(explicit_tail_calls))]

pub mod commands;
pub mod default;
pub mod input;
Expand Down
11 changes: 11 additions & 0 deletions crates/sdk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ metrics = [
"openvm-native-recursion/metrics",
"openvm-native-compiler/metrics",
]
tco = [
"openvm-circuit/tco",
"openvm-rv32im-circuit/tco",
"openvm-native-circuit/tco",
"openvm-sha256-circuit/tco",
"openvm-keccak256-circuit/tco",
"openvm-bigint-circuit/tco",
"openvm-algebra-circuit/tco",
"openvm-ecc-circuit/tco",
"openvm-pairing-circuit/tco"
]
# for guest profiling:
perf-metrics = ["openvm-circuit/perf-metrics", "openvm-transpiler/function-span"]
# turns on stark-backend debugger in all proofs
Expand Down
2 changes: 2 additions & 0 deletions crates/sdk/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![cfg_attr(feature = "tco", allow(incomplete_features))]
#![cfg_attr(feature = "tco", feature(explicit_tail_calls))]
use std::{
borrow::Borrow,
fs::read,
Expand Down
3 changes: 3 additions & 0 deletions crates/vm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ basic-memory = []
# turns on stark-backend debugger in all proofs
stark-debug = []
test-utils = ["openvm-stark-sdk"]
# Tail call optimizations. This requires nightly for the `become` keyword (https://github.com/rust-lang/rust/pull/144232).
# However tail call elimination is still an incomplete feature in Rust, so the `tco` feature remains experimental until then.
tco = ["openvm-circuit-derive/tco"]
# performance features:
mimalloc = ["openvm-stark-backend/mimalloc"]
jemalloc = ["openvm-stark-backend/jemalloc"]
Expand Down
5 changes: 4 additions & 1 deletion crates/vm/derive/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ license.workspace = true
proc-macro = true

[dependencies]
syn = { version = "2.0", features = ["parsing"] }
syn = { version = "2.0", features = ["parsing", "full"] }
quote = "1.0"
proc-macro2 = "1.0"
itertools = { workspace = true }

[features]
tco = []
112 changes: 107 additions & 5 deletions crates/vm/derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ use syn::{
GenericParam, Ident, Meta, Token,
};

#[cfg(feature = "tco")]
mod tco;

#[proc_macro_derive(PreflightExecutor)]
pub fn preflight_executor_derive(input: TokenStream) -> TokenStream {
let ast: syn::DeriveInput = syn::parse(input).unwrap();
Expand Down Expand Up @@ -172,6 +175,18 @@ pub fn executor_derive(input: TokenStream) -> TokenStream {
Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, {
self.0.pre_compute(pc, inst, data)
}

#[cfg(feature = "tco")]
fn handler<Ctx>(
&self,
pc: u32,
inst: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
data: &mut [u8],
) -> Result<::openvm_circuit::arch::Handler<F, Ctx>, ::openvm_circuit::arch::StaticProgramError>
where
Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, {
self.0.handler(pc, inst, data)
}
}
}
.into()
Expand Down Expand Up @@ -205,18 +220,21 @@ pub fn executor_derive(input: TokenStream) -> TokenStream {
});
// Use full path ::openvm_circuit... so it can be used either within or outside the vm
// crate. Assume F is already generic of the field.
let (pre_compute_size_arms, pre_compute_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| {
let (pre_compute_size_arms, pre_compute_arms, handler_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| {
let field_ty = &field.ty;
let pre_compute_size_arm = quote! {
#name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::pre_compute_size(x)
};
let pre_compute_arm = quote! {
#name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::pre_compute(x, pc, instruction, data)
};
let handler_arm = quote! {
#name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::handler(x, pc, instruction, data)
};
let where_predicate = syn::parse_quote! {
#field_ty: ::openvm_circuit::arch::Executor<#first_ty_generic>
};
(pre_compute_size_arm, pre_compute_arm, where_predicate)
(pre_compute_size_arm, pre_compute_arm, handler_arm, where_predicate)
}));
let where_clause = new_generics.make_where_clause();
for predicate in where_predicates {
Expand Down Expand Up @@ -247,6 +265,20 @@ pub fn executor_derive(input: TokenStream) -> TokenStream {
#(#pre_compute_arms,)*
}
}

#[cfg(feature = "tco")]
fn handler<Ctx>(
&self,
pc: u32,
instruction: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
data: &mut [u8],
) -> Result<::openvm_circuit::arch::Handler<F, Ctx>, ::openvm_circuit::arch::StaticProgramError>
where
Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, {
match self {
#(#handler_arms,)*
}
}
}
}
.into()
Expand Down Expand Up @@ -300,6 +332,18 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream {
Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, {
self.0.metered_pre_compute(chip_idx, pc, inst, data)
}
#[cfg(feature = "tco")]
fn metered_handler<Ctx>(
&self,
chip_idx: usize,
pc: u32,
inst: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
data: &mut [u8],
) -> Result<::openvm_circuit::arch::Handler<F, Ctx>, ::openvm_circuit::arch::StaticProgramError>
where
Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, {
self.0.metered_handler(chip_idx, pc, inst, data)
}
}
}
.into()
Expand Down Expand Up @@ -333,18 +377,21 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream {
});
// Use full path ::openvm_circuit... so it can be used either within or outside the vm
// crate. Assume F is already generic of the field.
let (pre_compute_size_arms, metered_pre_compute_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| {
let (pre_compute_size_arms, metered_pre_compute_arms, metered_handler_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| {
let field_ty = &field.ty;
let pre_compute_size_arm = quote! {
#name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_pre_compute_size(x)
};
let metered_pre_compute_arm = quote! {
#name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_pre_compute(x, chip_idx, pc, instruction, data)
};
let metered_handler_arm = quote! {
#name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_handler(x, chip_idx, pc, instruction, data)
};
let where_predicate = syn::parse_quote! {
#field_ty: ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>
};
(pre_compute_size_arm, metered_pre_compute_arm, where_predicate)
(pre_compute_size_arm, metered_pre_compute_arm, metered_handler_arm, where_predicate)
}));
let where_clause = new_generics.make_where_clause();
for predicate in where_predicates {
Expand Down Expand Up @@ -376,6 +423,21 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream {
#(#metered_pre_compute_arms,)*
}
}

#[cfg(feature = "tco")]
fn metered_handler<Ctx>(
&self,
chip_idx: usize,
pc: u32,
instruction: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
data: &mut [u8],
) -> Result<::openvm_circuit::arch::Handler<F, Ctx>, ::openvm_circuit::arch::StaticProgramError>
where
Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, {
match self {
#(#metered_handler_arms,)*
}
}
}
}
.into()
Expand Down Expand Up @@ -501,7 +563,6 @@ fn generate_config_traits_impl(name: &Ident, inner: &DataStruct) -> syn::Result<
.iter()
.filter(|f| f.attrs.iter().any(|attr| attr.path().is_ident("config")))
.exactly_one()
.clone()
.expect("Exactly one field must have the #[config] attribute");
let (source_name, source_name_upper) =
gen_name_with_uppercase_idents(source_field.ident.as_ref().unwrap());
Expand Down Expand Up @@ -700,3 +761,44 @@ fn parse_executor_type(
})
}
}

/// An attribute procedural macro for creating TCO (Tail Call Optimization) handlers.
///
/// This macro generates a handler function that wraps an execute implementation
/// with tail call optimization using the `become` keyword. It extracts the generics
/// and where clauses from the original function.
///
/// # Usage
///
/// Place this attribute above a function definition:
/// ```
/// #[create_tco_handler]
/// unsafe fn execute_e1_impl<F: PrimeField32, CTX, const B_IS_IMM: bool>(
/// pre_compute: &[u8],
/// state: &mut VmExecState<F, GuestMemory, CTX>,
/// ) where
/// CTX: ExecutionCtxTrait,
/// {
/// // function body
/// }
/// ```
///
/// This will generate a TCO handler function with the same generics and where clauses.
///
/// # Safety
///
/// Do not use this macro if your function wants to terminate execution without error with a
/// specific error code. The handler generated by this macro assumes that execution should continue
/// unless the execute_impl returns an error. This is done for performance to skip an exit code
/// check.
#[proc_macro_attribute]
pub fn create_tco_handler(_attr: TokenStream, item: TokenStream) -> TokenStream {
#[cfg(feature = "tco")]
{
tco::tco_impl(item)
}
#[cfg(not(feature = "tco"))]
{
item
}
}
Loading
Loading