From a45ae749bd2ca584cfdafdcc608bbb142ccbc796 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 7 Oct 2025 00:16:01 -0700 Subject: [PATCH 01/11] feat(sdk): async app prover --- Cargo.lock | 1 + Cargo.toml | 1 + benchmarks/prove/Cargo.toml | 12 +- benchmarks/prove/src/bin/async_regex.rs | 59 ++----- crates/sdk/Cargo.toml | 2 + crates/sdk/src/prover/app.rs | 211 +++++++++++++++++++++++- crates/vm/src/arch/interpreter.rs | 5 +- 7 files changed, 233 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e23479adf9..dbbfa83917 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6164,6 +6164,7 @@ dependencies = [ "snark-verifier-sdk", "tempfile", "thiserror 1.0.69", + "tokio", "toml 0.8.23", "tracing", ] diff --git a/Cargo.toml b/Cargo.toml index 83cd3cbf14..847352924d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -233,6 +233,7 @@ dashmap = "6.1.0" memmap2 = "0.9.5" libc = "0.2.175" tracing-subscriber = { version = "0.3.20", features = ["std", "env-filter"] } +tokio = "1.47.1" # default-features = false for no_std for use in guest programs itertools = { version = "0.14.0", default-features = false } diff --git a/benchmarks/prove/Cargo.toml b/benchmarks/prove/Cargo.toml index df50329478..dd9b75b79e 100644 --- a/benchmarks/prove/Cargo.toml +++ b/benchmarks/prove/Cargo.toml @@ -19,9 +19,9 @@ openvm-native-circuit.workspace = true openvm-native-compiler.workspace = true openvm-native-recursion = { workspace = true, features = ["test-utils"] } -clap = { version = "4.5.9", features = ["derive", "env"] } +clap = { workspace = true, features = ["derive", "env"] } eyre.workspace = true -tokio = { version = "1.43.1", features = ["rt", "rt-multi-thread", "macros", "time"] } +tokio = { workspace = true, features = ["rt", "rt-multi-thread", "macros"] } rand_chacha = { version = "0.3", default-features = false } k256 = { workspace = true, features = ["ecdsa"] } tiny-keccak.workspace = true @@ -33,11 +33,12 @@ metrics.workspace = true [dev-dependencies] [features] -default = ["parallel", "jemalloc", "metrics"] +default = ["parallel", "jemalloc", "metrics", "async"] metrics = ["openvm-sdk/metrics"] tco = ["openvm-sdk/tco"] perf-metrics = ["openvm-sdk/perf-metrics", "metrics"] stark-debug = ["openvm-sdk/stark-debug"] +async = ["openvm-sdk/async"] # runs leaf aggregation benchmarks: aggregation = [] evm = ["openvm-sdk/evm-verify"] @@ -63,3 +64,8 @@ path = "src/bin/fib_e2e.rs" [[bin]] name = "kitchen_sink" path = "src/bin/kitchen_sink.rs" + +[[bin]] +name = "async_regex" +path = "src/bin/async_regex.rs" +required-features = ["async"] diff --git a/benchmarks/prove/src/bin/async_regex.rs b/benchmarks/prove/src/bin/async_regex.rs index 4633fd5ec9..65dcfe85df 100644 --- a/benchmarks/prove/src/bin/async_regex.rs +++ b/benchmarks/prove/src/bin/async_regex.rs @@ -1,22 +1,14 @@ -use std::{ - env::var, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, - }, - time::Duration, -}; +use std::env::var; use clap::Parser; use openvm_benchmarks_prove::util::BenchmarkCli; use openvm_benchmarks_utils::get_programs_dir; use openvm_sdk::{ config::{SdkVmBuilder, SdkVmConfig}, - prover::AppProver, + prover::AsyncAppProver, DefaultStarkEngine, Sdk, StdIn, F, }; use openvm_stark_sdk::config::setup_tracing; -use tokio::{spawn, task::spawn_blocking, time::sleep}; #[tokio::main] async fn main() -> eyre::Result<()> { @@ -47,46 +39,15 @@ async fn main() -> eyre::Result<()> { let (app_pk, _app_vk) = sdk.app_keygen(); let max_par_jobs: usize = var("MAX_PAR_JOBS").map(|m| m.parse()).unwrap_or(Ok(1))?; - let num_jobs: usize = var("NUM_JOBS").map(|m| m.parse()).unwrap_or(Ok(20))?; - let cur_num_jobs = Arc::new(AtomicUsize::new(0)); - let mut tasks = Vec::with_capacity(num_jobs); - for idx in 0..num_jobs { - let cur_num_jobs = cur_num_jobs.clone(); - let app_exe = app_exe.clone(); - let app_pk = app_pk.clone(); - let input = input.clone(); - let task = spawn(async move { - loop { - let c = cur_num_jobs.fetch_add(1, Ordering::SeqCst); - if c < max_par_jobs { - tracing::info!("Acquired job {}, cur num jobs {}", idx, c + 1); - break; - } - cur_num_jobs.fetch_sub(1, Ordering::SeqCst); - sleep(Duration::from_millis(100)).await; - } - let res = spawn_blocking(move || -> eyre::Result<()> { - let mut prover = AppProver::::new( - SdkVmBuilder, - &app_pk.app_vm_pk, - app_exe, - app_pk.leaf_verifier_program_commit(), - )?; - let _proof = prover.prove(input)?; - Ok(()) - }) - .await?; - let prev_num = cur_num_jobs.fetch_sub(1, Ordering::SeqCst); - tracing::info!("Decrement cur_num_jobs {} to {}", prev_num, prev_num - 1); - res - }); - tasks.push(task); - sleep(Duration::from_millis(1000)).await; - } - for task in tasks { - task.await??; - } + let prover = AsyncAppProver::::new( + SdkVmBuilder, + app_pk.app_vm_pk.clone(), + app_exe, + app_pk.leaf_verifier_program_commit(), + max_par_jobs, + )?; + let _proof = prover.prove(input).await?; Ok(()) } diff --git a/crates/sdk/Cargo.toml b/crates/sdk/Cargo.toml index 44197f1eab..60b2a89769 100644 --- a/crates/sdk/Cargo.toml +++ b/crates/sdk/Cargo.toml @@ -61,6 +61,7 @@ forge-fmt = { workspace = true, optional = true } rrs-lib.workspace = true num-bigint.workspace = true cfg-if.workspace = true +tokio = { workspace = true, features = ["rt", "sync"], optional = true } [features] default = ["parallel", "jemalloc"] @@ -101,6 +102,7 @@ perf-metrics = [ # turns on stark-backend debugger in all proofs stark-debug = ["openvm-circuit/stark-debug"] test-utils = ["openvm-circuit/test-utils"] +async = ["tokio"] # performance features: # (rayon is always imported because of halo2, so "parallel" feature is redundant) parallel = ["openvm-circuit/parallel"] diff --git a/crates/sdk/src/prover/app.rs b/crates/sdk/src/prover/app.rs index c85cf413a2..16a81e5fb7 100644 --- a/crates/sdk/src/prover/app.rs +++ b/crates/sdk/src/prover/app.rs @@ -1,5 +1,7 @@ use std::sync::{Arc, OnceLock}; +#[cfg(feature = "async")] +pub use async_prover::*; use getset::Getters; use itertools::Itertools; use openvm_circuit::{ @@ -130,10 +132,7 @@ where + MeteredExecutor> + PreflightExecutor, VB::RecordArena>, { - assert!( - self.vm_config().as_ref().continuation_enabled, - "Use generate_app_proof_without_continuations instead." - ); + assert!(self.vm_config().as_ref().continuation_enabled); check_max_constraint_degrees( self.vm_config().as_ref(), &self.instance.vm.engine.fri_params(), @@ -238,3 +237,207 @@ pub fn verify_app_proof( user_public_values, }) } + +#[cfg(feature = "async")] +mod async_prover { + use derivative::Derivative; + use openvm_circuit::system::memory::merkle::public_values::UserPublicValuesProof; + use openvm_stark_sdk::config::FriParameters; + use tokio::{spawn, sync::Semaphore, task::spawn_blocking}; + + use super::*; + + /// Thread-safe asynchronous app prover. + #[derive(Derivative, Getters)] + #[derivative(Clone)] + pub struct AsyncAppProver + where + E: StarkEngine, + VB: VmBuilder, + { + pub program_name: Option, + #[getset(get = "pub")] + vm_builder: VB, + #[getset(get = "pub")] + app_vm_pk: Arc>, + app_exe: Arc>>, + #[getset(get = "pub")] + leaf_verifier_program_commit: Com, + + semaphore: Arc, + } + + impl AsyncAppProver + where + E: StarkFriEngine + 'static, + VB: VmBuilder + Clone + Send + Sync + 'static, + VB::VmConfig: Send + Sync, + >>::Executor: Executor> + + MeteredExecutor> + + PreflightExecutor, VB::RecordArena>, + Val: PrimeField32, + Com: + AsRef<[Val; CHUNK]> + From<[Val; CHUNK]> + Into<[Val; CHUNK]>, + { + pub fn new( + vm_builder: VB, + app_vm_pk: Arc>, + app_exe: Arc>>, + leaf_verifier_program_commit: Com, + max_concurrency: usize, + ) -> Result { + Ok(Self { + program_name: None, + vm_builder, + app_vm_pk, + app_exe, + leaf_verifier_program_commit, + semaphore: Arc::new(Semaphore::new(max_concurrency)), + }) + } + + pub fn set_program_name(&mut self, program_name: impl AsRef) -> &mut Self { + self.program_name = Some(program_name.as_ref().to_string()); + self + } + pub fn with_program_name(mut self, program_name: impl AsRef) -> Self { + self.set_program_name(program_name); + self + } + + /// App Exe + pub fn exe(&self) -> Arc>> { + self.app_exe.clone() + } + + /// App VM config + pub fn vm_config(&self) -> &VB::VmConfig { + &self.app_vm_pk.vm_config + } + + pub fn fri_params(&self) -> FriParameters { + self.app_vm_pk.fri_params + } + + /// Creates an [AppProver] within a particular thread. The former instance is not + /// thread-safe and should **not** be moved between threads. + pub fn local(&self) -> Result, VirtualMachineError> { + AppProver::new( + self.vm_builder.clone(), + &self.app_vm_pk, + self.app_exe.clone(), + self.leaf_verifier_program_commit.clone(), + ) + } + + pub async fn prove( + self, + input: StdIn>, + ) -> eyre::Result> { + assert!(self.vm_config().as_ref().continuation_enabled); + check_max_constraint_degrees(self.vm_config().as_ref(), &self.fri_params()); + #[cfg(feature = "metrics")] + metrics::counter!("fri.log_blowup").absolute(self.fri_params().log_blowup as u64); + + info_span!( + "app proof", + group = self + .program_name + .as_ref() + .unwrap_or(&"app_proof".to_string()) + ) + .in_scope(async || { + // PERF[jpw]: it is possible to create metered_interpreter without creating vm. The + // latter is more convenient, but does unnecessary setup (e.g., transfer pk to + // device). Also, app_commit should be cached. + let mut local_prover = self.local()?; + let app_commit = local_prover.app_commit(); + local_prover.instance.reset_state(input.clone()); + let mut state = local_prover.instance.state_mut().take().unwrap(); + let vm = &mut local_prover.instance.vm; + let metered_ctx = vm.build_metered_ctx(&self.app_exe); + let metered_interpreter = vm.metered_interpreter(&self.app_exe)?; + let (segments, _) = metered_interpreter.execute_metered(input, metered_ctx)?; + drop(metered_interpreter); + let pure_interpreter = vm.interpreter(&self.app_exe)?; + let mut tasks = Vec::with_capacity(segments.len()); + for (seg_idx, segment) in segments.into_iter().enumerate() { + tracing::info!( + "Re-executing from instret {} to instret_start {} for segment {seg_idx}", + state.instret(), + segment.instret_start + ); + let num_insns = segment.instret_start.checked_sub(state.instret()).unwrap(); + state = pure_interpreter.execute_from_state(state, Some(num_insns))?; + + let semaphore = self.semaphore.clone(); + let async_worker = self.clone(); + let start_state = state.clone(); + let task = spawn(async move { + let _permit = semaphore.acquire().await?; + spawn_blocking(move || { + info_span!("prove_segment", segment = seg_idx).in_scope( + || -> eyre::Result<_> { + // We need a separate span so the metric label includes + // "segment" + // from _segment_span + let _prove_span = info_span!("vm_prove").entered(); + let mut worker = async_worker.local()?; + let instance = &mut worker.instance; + let vm = &mut instance.vm; + let preflight_interpreter = &mut instance.interpreter; + let (segment_proof, _) = vm.prove( + preflight_interpreter, + start_state, + Some(segment.num_insns), + &segment.trace_heights, + )?; + Ok(segment_proof) + }, + ) + }) + .await? + }); + tasks.push(task); + } + let final_memory = &state.memory.memory; + let user_public_values = UserPublicValuesProof::compute( + vm.config().as_ref().memory_config.memory_dimensions(), + vm.config().as_ref().num_public_values, + &vm_poseidon2_hasher(), + final_memory, + ); + + let mut proofs = Vec::with_capacity(tasks.len()); + for task in tasks { + let proof = task.await??; + proofs.push(proof); + } + let cont_proof = ContinuationVmProof { + per_segment: proofs, + user_public_values, + }; + + // We skip verification of the user public values proof here because it is directly + // computed from the merkle tree above + let engine = E::new(self.fri_params()); + let res = verify_segments( + &engine, + &self.app_vm_pk.vm_pk.get_vk(), + &cont_proof.per_segment, + )?; + let app_exe_commit_u32s = app_commit.app_exe_commit.to_u32_digest(); + let exe_commit_u32s = res.exe_commit.map(|x| x.as_canonical_u32()); + if exe_commit_u32s != app_exe_commit_u32s { + return Err(VmVerificationError::ExeCommitMismatch { + expected: app_exe_commit_u32s, + actual: exe_commit_u32s, + } + .into()); + } + Ok(cont_proof) + }) + .await + } + } +} diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 03acbf245c..e9fe32a47b 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -353,10 +353,11 @@ where from_state: VmState, num_insns: Option, ) -> Result, ExecutionError> { - let ctx = ExecutionCtx::new(num_insns); + let instret = from_state.instret(); + let instret_end = num_insns.map(|n| instret.checked_add(n).unwrap()); + let ctx = ExecutionCtx::new(instret_end); let mut exec_state = VmExecState::new(from_state, ctx); - let instret = exec_state.instret(); let pc = exec_state.pc(); let instret_end = exec_state.ctx.instret_end; run!( From f44e9a50e93c42993f43183c6be54a118ae71388 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 7 Oct 2025 00:31:05 -0700 Subject: [PATCH 02/11] chore: return err on overflow --- crates/vm/src/arch/execution.rs | 2 ++ crates/vm/src/arch/interpreter.rs | 12 +++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index 4431c2f1e3..b6c8270585 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -66,6 +66,8 @@ pub enum ExecutionError { FailedWithExitCode(u32), #[error("trace buffer out of bounds: requested {requested} but capacity is {capacity}")] TraceBufferOutOfBounds { requested: usize, capacity: usize }, + #[error("instruction counter overflow: {instret} + {num_insns} > u64::MAX")] + InstretOverflow { instret: u64, num_insns: u64 }, #[error("inventory error: {0}")] Inventory(#[from] ExecutorInventoryError), #[error("static program error: {0}")] diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index e9fe32a47b..4fafe7a438 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -354,7 +354,17 @@ where num_insns: Option, ) -> Result, ExecutionError> { let instret = from_state.instret(); - let instret_end = num_insns.map(|n| instret.checked_add(n).unwrap()); + let instret_end = if let Some(n) = num_insns { + let end = instret + .checked_add(n) + .ok_or(ExecutionError::InstretOverflow { + instret, + num_insns: n, + })?; + Some(end) + } else { + None + }; let ctx = ExecutionCtx::new(instret_end); let mut exec_state = VmExecState::new(from_state, ctx); From 0c2c7afcf25fcdd22c0a3dc5e3c95131e38b94e1 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 7 Oct 2025 10:45:37 -0700 Subject: [PATCH 03/11] chore: loosen tokio versioning --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 847352924d..735ea361f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -233,7 +233,7 @@ dashmap = "6.1.0" memmap2 = "0.9.5" libc = "0.2.175" tracing-subscriber = { version = "0.3.20", features = ["std", "env-filter"] } -tokio = "1.47.1" +tokio = "1" # >=1.0.0 to allow downstream flexibility # default-features = false for no_std for use in guest programs itertools = { version = "0.14.0", default-features = false } From c9980852381241d89a40d5aae493bc6f22418425 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:36:56 -0700 Subject: [PATCH 04/11] debug --- crates/sdk/src/prover/app.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/sdk/src/prover/app.rs b/crates/sdk/src/prover/app.rs index 16a81e5fb7..6e518c22f1 100644 --- a/crates/sdk/src/prover/app.rs +++ b/crates/sdk/src/prover/app.rs @@ -363,12 +363,15 @@ mod async_prover { let mut tasks = Vec::with_capacity(segments.len()); for (seg_idx, segment) in segments.into_iter().enumerate() { tracing::info!( - "Re-executing from instret {} to instret_start {} for segment {seg_idx}", - state.instret(), - segment.instret_start + %seg_idx, + instret = state.instret(), + %segment.instret_start, + pc = state.pc(), + "Re-executing", ); let num_insns = segment.instret_start.checked_sub(state.instret()).unwrap(); state = pure_interpreter.execute_from_state(state, Some(num_insns))?; + tracing::info!(segment_start_pc = state.pc()); let semaphore = self.semaphore.clone(); let async_worker = self.clone(); From f3d2da1d93c756ffa1a9f452df5ce0a68baa297c Mon Sep 17 00:00:00 2001 From: Ayush Shukla Date: Tue, 7 Oct 2025 22:08:19 +0200 Subject: [PATCH 05/11] fix: handle tco case for 0 num_insns --- crates/vm/src/arch/interpreter.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 4fafe7a438..52b1980424 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -104,6 +104,12 @@ macro_rules! run { #[cfg(feature = "tco")] { tracing::debug!("execute_tco"); + + if $ctx::should_suspend($instret, $pc, $arg, &mut $exec_state) { + $exec_state.set_instret_and_pc($instret, $pc); + return Ok(()); + } + let handler = $interpreter .get_handler($pc) .ok_or(ExecutionError::PcOutOfBounds($pc))?; From d62b0d9db9b7f187e7d0cf96d10f960c72c5138a Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 7 Oct 2025 13:26:36 -0700 Subject: [PATCH 06/11] chore: switch stark-backend branch --- Cargo.lock | 12 ++++++------ Cargo.toml | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbbfa83917..837940ad52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5459,7 +5459,7 @@ dependencies = [ [[package]] name = "openvm-cuda-backend" version = "1.2.1-rc.4" -source = "git+https://github.com/openvm-org/stark-backend.git?branch=fix%2Fasync-errors#f9e8d331ee0e9a8ba613835a17edab45d23ed289" +source = "git+https://github.com/openvm-org/stark-backend.git?branch=main#501ac3125638fc3f5a044a9be5cc5220b5b575e2" dependencies = [ "bincode 2.0.1", "bincode_derive", @@ -5491,7 +5491,7 @@ dependencies = [ [[package]] name = "openvm-cuda-builder" version = "1.2.1-rc.4" -source = "git+https://github.com/openvm-org/stark-backend.git?branch=fix%2Fasync-errors#f9e8d331ee0e9a8ba613835a17edab45d23ed289" +source = "git+https://github.com/openvm-org/stark-backend.git?branch=main#501ac3125638fc3f5a044a9be5cc5220b5b575e2" dependencies = [ "cc", "glob", @@ -5500,7 +5500,7 @@ dependencies = [ [[package]] name = "openvm-cuda-common" version = "1.2.1-rc.4" -source = "git+https://github.com/openvm-org/stark-backend.git?branch=fix%2Fasync-errors#f9e8d331ee0e9a8ba613835a17edab45d23ed289" +source = "git+https://github.com/openvm-org/stark-backend.git?branch=main#501ac3125638fc3f5a044a9be5cc5220b5b575e2" dependencies = [ "bytesize", "ctor", @@ -6247,7 +6247,7 @@ dependencies = [ [[package]] name = "openvm-stark-backend" version = "1.2.1-rc.4" -source = "git+https://github.com/openvm-org/stark-backend.git?branch=fix%2Fasync-errors#f9e8d331ee0e9a8ba613835a17edab45d23ed289" +source = "git+https://github.com/openvm-org/stark-backend.git?branch=main#501ac3125638fc3f5a044a9be5cc5220b5b575e2" dependencies = [ "bitcode", "cfg-if", @@ -6277,11 +6277,11 @@ dependencies = [ [[package]] name = "openvm-stark-sdk" version = "1.2.1-rc.4" -source = "git+https://github.com/openvm-org/stark-backend.git?branch=fix%2Fasync-errors#f9e8d331ee0e9a8ba613835a17edab45d23ed289" +source = "git+https://github.com/openvm-org/stark-backend.git?branch=main#501ac3125638fc3f5a044a9be5cc5220b5b575e2" dependencies = [ "dashmap", "derivative", - "derive_more 0.99.20", + "derive_more 1.0.0", "ff 0.13.1", "itertools 0.14.0", "metrics", diff --git a/Cargo.toml b/Cargo.toml index 735ea361f7..f7a943b185 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -113,11 +113,11 @@ lto = "thin" [workspace.dependencies] # Stark Backend -openvm-stark-backend = { git = "https://github.com/openvm-org/stark-backend.git", branch="fix/async-errors", default-features = false } -openvm-stark-sdk = { git = "https://github.com/openvm-org/stark-backend.git", branch="fix/async-errors", default-features = false } -openvm-cuda-backend = { git = "https://github.com/openvm-org/stark-backend.git", branch="fix/async-errors", default-features = false } -openvm-cuda-builder = { git = "https://github.com/openvm-org/stark-backend.git", branch="fix/async-errors", default-features = false } -openvm-cuda-common = { git = "https://github.com/openvm-org/stark-backend.git", branch="fix/async-errors", default-features = false } +openvm-stark-backend = { git = "https://github.com/openvm-org/stark-backend.git", branch="main", default-features = false } +openvm-stark-sdk = { git = "https://github.com/openvm-org/stark-backend.git", branch="main", default-features = false } +openvm-cuda-backend = { git = "https://github.com/openvm-org/stark-backend.git", branch="main", default-features = false } +openvm-cuda-builder = { git = "https://github.com/openvm-org/stark-backend.git", branch="main", default-features = false } +openvm-cuda-common = { git = "https://github.com/openvm-org/stark-backend.git", branch="main", default-features = false } # OpenVM openvm-sdk = { path = "crates/sdk", default-features = false } From fba35ed00bd413a1ad8edfd6b777e85011e93dee Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:01:55 -0700 Subject: [PATCH 07/11] fix: pure execute last segment --- crates/sdk/src/prover/app.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/sdk/src/prover/app.rs b/crates/sdk/src/prover/app.rs index 6e518c22f1..94922e2d78 100644 --- a/crates/sdk/src/prover/app.rs +++ b/crates/sdk/src/prover/app.rs @@ -241,7 +241,9 @@ pub fn verify_app_proof( #[cfg(feature = "async")] mod async_prover { use derivative::Derivative; - use openvm_circuit::system::memory::merkle::public_values::UserPublicValuesProof; + use openvm_circuit::{ + arch::ExecutionError, system::memory::merkle::public_values::UserPublicValuesProof, + }; use openvm_stark_sdk::config::FriParameters; use tokio::{spawn, sync::Semaphore, task::spawn_blocking}; @@ -361,6 +363,10 @@ mod async_prover { drop(metered_interpreter); let pure_interpreter = vm.interpreter(&self.app_exe)?; let mut tasks = Vec::with_capacity(segments.len()); + let terminal_instret = segments + .last() + .map(|s| s.instret_start + s.num_insns) + .unwrap_or(u64::MAX); for (seg_idx, segment) in segments.into_iter().enumerate() { tracing::info!( %seg_idx, @@ -403,6 +409,12 @@ mod async_prover { }); tasks.push(task); } + // Finish execution to termination + state = pure_interpreter.execute_from_state(state, None)?; + if state.instret() != terminal_instret { + // This should never happen + return eyre::eyre!("Pure and metered execution inconsistency"); + } let final_memory = &state.memory.memory; let user_public_values = UserPublicValuesProof::compute( vm.config().as_ref().memory_config.memory_dimensions(), From 9dc634a4fe7178a26c78b51db136c2dd425b5869 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 7 Oct 2025 14:31:46 -0700 Subject: [PATCH 08/11] fix: error type --- crates/sdk/src/prover/app.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/sdk/src/prover/app.rs b/crates/sdk/src/prover/app.rs index 94922e2d78..65b209e020 100644 --- a/crates/sdk/src/prover/app.rs +++ b/crates/sdk/src/prover/app.rs @@ -377,7 +377,6 @@ mod async_prover { ); let num_insns = segment.instret_start.checked_sub(state.instret()).unwrap(); state = pure_interpreter.execute_from_state(state, Some(num_insns))?; - tracing::info!(segment_start_pc = state.pc()); let semaphore = self.semaphore.clone(); let async_worker = self.clone(); @@ -412,8 +411,13 @@ mod async_prover { // Finish execution to termination state = pure_interpreter.execute_from_state(state, None)?; if state.instret() != terminal_instret { + tracing::warn!( + "Pure execution terminal instret={}, metered execution terminal instret={}", + state.instret(), + terminal_instret + ); // This should never happen - return eyre::eyre!("Pure and metered execution inconsistency"); + return Err(ExecutionError::DidNotTerminate.into()); } let final_memory = &state.memory.memory; let user_public_values = UserPublicValuesProof::compute( From 8803574e79a4096f7ea3205e1383c7fc4e5817ef Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Thu, 9 Oct 2025 21:49:49 -0400 Subject: [PATCH 09/11] fix: tracing --- crates/sdk/src/prover/app.rs | 224 +++++++++++++++++------------------ 1 file changed, 110 insertions(+), 114 deletions(-) diff --git a/crates/sdk/src/prover/app.rs b/crates/sdk/src/prover/app.rs index 65b209e020..26507aa467 100644 --- a/crates/sdk/src/prover/app.rs +++ b/crates/sdk/src/prover/app.rs @@ -246,6 +246,7 @@ mod async_prover { }; use openvm_stark_sdk::config::FriParameters; use tokio::{spawn, sync::Semaphore, task::spawn_blocking}; + use tracing::instrument; use super::*; @@ -332,6 +333,11 @@ mod async_prover { ) } + #[instrument( + name = "app proof", + skip_all, + group = self.program_name.as_ref().unwrap_or(&"app_proof".to_string()) + )] pub async fn prove( self, input: StdIn>, @@ -341,122 +347,112 @@ mod async_prover { #[cfg(feature = "metrics")] metrics::counter!("fri.log_blowup").absolute(self.fri_params().log_blowup as u64); - info_span!( - "app proof", - group = self - .program_name - .as_ref() - .unwrap_or(&"app_proof".to_string()) - ) - .in_scope(async || { - // PERF[jpw]: it is possible to create metered_interpreter without creating vm. The - // latter is more convenient, but does unnecessary setup (e.g., transfer pk to - // device). Also, app_commit should be cached. - let mut local_prover = self.local()?; - let app_commit = local_prover.app_commit(); - local_prover.instance.reset_state(input.clone()); - let mut state = local_prover.instance.state_mut().take().unwrap(); - let vm = &mut local_prover.instance.vm; - let metered_ctx = vm.build_metered_ctx(&self.app_exe); - let metered_interpreter = vm.metered_interpreter(&self.app_exe)?; - let (segments, _) = metered_interpreter.execute_metered(input, metered_ctx)?; - drop(metered_interpreter); - let pure_interpreter = vm.interpreter(&self.app_exe)?; - let mut tasks = Vec::with_capacity(segments.len()); - let terminal_instret = segments - .last() - .map(|s| s.instret_start + s.num_insns) - .unwrap_or(u64::MAX); - for (seg_idx, segment) in segments.into_iter().enumerate() { - tracing::info!( - %seg_idx, - instret = state.instret(), - %segment.instret_start, - pc = state.pc(), - "Re-executing", - ); - let num_insns = segment.instret_start.checked_sub(state.instret()).unwrap(); - state = pure_interpreter.execute_from_state(state, Some(num_insns))?; - - let semaphore = self.semaphore.clone(); - let async_worker = self.clone(); - let start_state = state.clone(); - let task = spawn(async move { - let _permit = semaphore.acquire().await?; - spawn_blocking(move || { - info_span!("prove_segment", segment = seg_idx).in_scope( - || -> eyre::Result<_> { - // We need a separate span so the metric label includes - // "segment" - // from _segment_span - let _prove_span = info_span!("vm_prove").entered(); - let mut worker = async_worker.local()?; - let instance = &mut worker.instance; - let vm = &mut instance.vm; - let preflight_interpreter = &mut instance.interpreter; - let (segment_proof, _) = vm.prove( - preflight_interpreter, - start_state, - Some(segment.num_insns), - &segment.trace_heights, - )?; - Ok(segment_proof) - }, - ) - }) - .await? - }); - tasks.push(task); - } - // Finish execution to termination - state = pure_interpreter.execute_from_state(state, None)?; - if state.instret() != terminal_instret { - tracing::warn!( - "Pure execution terminal instret={}, metered execution terminal instret={}", - state.instret(), - terminal_instret - ); - // This should never happen - return Err(ExecutionError::DidNotTerminate.into()); - } - let final_memory = &state.memory.memory; - let user_public_values = UserPublicValuesProof::compute( - vm.config().as_ref().memory_config.memory_dimensions(), - vm.config().as_ref().num_public_values, - &vm_poseidon2_hasher(), - final_memory, + // PERF[jpw]: it is possible to create metered_interpreter without creating vm. The + // latter is more convenient, but does unnecessary setup (e.g., transfer pk to + // device). Also, app_commit should be cached. + let mut local_prover = self.local()?; + let app_commit = local_prover.app_commit(); + local_prover.instance.reset_state(input.clone()); + let mut state = local_prover.instance.state_mut().take().unwrap(); + let vm = &mut local_prover.instance.vm; + let metered_ctx = vm.build_metered_ctx(&self.app_exe); + let metered_interpreter = vm.metered_interpreter(&self.app_exe)?; + let (segments, _) = metered_interpreter.execute_metered(input, metered_ctx)?; + drop(metered_interpreter); + let pure_interpreter = vm.interpreter(&self.app_exe)?; + let mut tasks = Vec::with_capacity(segments.len()); + let terminal_instret = segments + .last() + .map(|s| s.instret_start + s.num_insns) + .unwrap_or(u64::MAX); + for (seg_idx, segment) in segments.into_iter().enumerate() { + tracing::info!( + %seg_idx, + instret = state.instret(), + %segment.instret_start, + pc = state.pc(), + "Re-executing", ); - - let mut proofs = Vec::with_capacity(tasks.len()); - for task in tasks { - let proof = task.await??; - proofs.push(proof); - } - let cont_proof = ContinuationVmProof { - per_segment: proofs, - user_public_values, - }; - - // We skip verification of the user public values proof here because it is directly - // computed from the merkle tree above - let engine = E::new(self.fri_params()); - let res = verify_segments( - &engine, - &self.app_vm_pk.vm_pk.get_vk(), - &cont_proof.per_segment, - )?; - let app_exe_commit_u32s = app_commit.app_exe_commit.to_u32_digest(); - let exe_commit_u32s = res.exe_commit.map(|x| x.as_canonical_u32()); - if exe_commit_u32s != app_exe_commit_u32s { - return Err(VmVerificationError::ExeCommitMismatch { - expected: app_exe_commit_u32s, - actual: exe_commit_u32s, - } - .into()); + let num_insns = segment.instret_start.checked_sub(state.instret()).unwrap(); + state = pure_interpreter.execute_from_state(state, Some(num_insns))?; + + let semaphore = self.semaphore.clone(); + let async_worker = self.clone(); + let start_state = state.clone(); + let task = spawn(async move { + let _permit = semaphore.acquire().await?; + spawn_blocking(move || { + info_span!("prove_segment", segment = seg_idx).in_scope( + || -> eyre::Result<_> { + // We need a separate span so the metric label includes + // "segment" + // from _segment_span + let _prove_span = info_span!("vm_prove").entered(); + let mut worker = async_worker.local()?; + let instance = &mut worker.instance; + let vm = &mut instance.vm; + let preflight_interpreter = &mut instance.interpreter; + let (segment_proof, _) = vm.prove( + preflight_interpreter, + start_state, + Some(segment.num_insns), + &segment.trace_heights, + )?; + Ok(segment_proof) + }, + ) + }) + .await? + }); + tasks.push(task); + } + // Finish execution to termination + state = pure_interpreter.execute_from_state(state, None)?; + if state.instret() != terminal_instret { + tracing::warn!( + "Pure execution terminal instret={}, metered execution terminal instret={}", + state.instret(), + terminal_instret + ); + // This should never happen + return Err(ExecutionError::DidNotTerminate.into()); + } + let final_memory = &state.memory.memory; + let user_public_values = UserPublicValuesProof::compute( + vm.config().as_ref().memory_config.memory_dimensions(), + vm.config().as_ref().num_public_values, + &vm_poseidon2_hasher(), + final_memory, + ); + + let mut proofs = Vec::with_capacity(tasks.len()); + for task in tasks { + let proof = task.await??; + proofs.push(proof); + } + let cont_proof = ContinuationVmProof { + per_segment: proofs, + user_public_values, + }; + + // We skip verification of the user public values proof here because it is directly + // computed from the merkle tree above + let engine = E::new(self.fri_params()); + let res = verify_segments( + &engine, + &self.app_vm_pk.vm_pk.get_vk(), + &cont_proof.per_segment, + )?; + let app_exe_commit_u32s = app_commit.app_exe_commit.to_u32_digest(); + let exe_commit_u32s = res.exe_commit.map(|x| x.as_canonical_u32()); + if exe_commit_u32s != app_exe_commit_u32s { + return Err(VmVerificationError::ExeCommitMismatch { + expected: app_exe_commit_u32s, + actual: exe_commit_u32s, } - Ok(cont_proof) - }) - .await + .into()); + } + Ok(cont_proof) } } } From beeabf92bb19259111d3865f53022e0ca1893e73 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Thu, 9 Oct 2025 22:15:46 -0400 Subject: [PATCH 10/11] fix instrument fields --- crates/sdk/src/prover/app.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/sdk/src/prover/app.rs b/crates/sdk/src/prover/app.rs index 26507aa467..80b6ea1417 100644 --- a/crates/sdk/src/prover/app.rs +++ b/crates/sdk/src/prover/app.rs @@ -336,7 +336,9 @@ mod async_prover { #[instrument( name = "app proof", skip_all, - group = self.program_name.as_ref().unwrap_or(&"app_proof".to_string()) + fields( + group = self.program_name.as_ref().unwrap_or(&"app_proof".to_string()) + ) )] pub async fn prove( self, From 9049f9cedc835d15b73d0401807305f332d562a3 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Thu, 9 Oct 2025 22:56:02 -0400 Subject: [PATCH 11/11] fix: async span handling --- crates/sdk/Cargo.toml | 2 +- crates/sdk/src/prover/app.rs | 57 ++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/crates/sdk/Cargo.toml b/crates/sdk/Cargo.toml index 60b2a89769..e9e13bbffb 100644 --- a/crates/sdk/Cargo.toml +++ b/crates/sdk/Cargo.toml @@ -64,7 +64,7 @@ cfg-if.workspace = true tokio = { workspace = true, features = ["rt", "sync"], optional = true } [features] -default = ["parallel", "jemalloc"] +default = ["parallel", "jemalloc", "async"] evm-prove = [ "openvm-continuations/static-verifier", "openvm-native-recursion/evm-prove", diff --git a/crates/sdk/src/prover/app.rs b/crates/sdk/src/prover/app.rs index 80b6ea1417..5d1b640393 100644 --- a/crates/sdk/src/prover/app.rs +++ b/crates/sdk/src/prover/app.rs @@ -246,7 +246,7 @@ mod async_prover { }; use openvm_stark_sdk::config::FriParameters; use tokio::{spawn, sync::Semaphore, task::spawn_blocking}; - use tracing::instrument; + use tracing::{instrument, Instrument}; use super::*; @@ -381,31 +381,36 @@ mod async_prover { let semaphore = self.semaphore.clone(); let async_worker = self.clone(); let start_state = state.clone(); - let task = spawn(async move { - let _permit = semaphore.acquire().await?; - spawn_blocking(move || { - info_span!("prove_segment", segment = seg_idx).in_scope( - || -> eyre::Result<_> { - // We need a separate span so the metric label includes - // "segment" - // from _segment_span - let _prove_span = info_span!("vm_prove").entered(); - let mut worker = async_worker.local()?; - let instance = &mut worker.instance; - let vm = &mut instance.vm; - let preflight_interpreter = &mut instance.interpreter; - let (segment_proof, _) = vm.prove( - preflight_interpreter, - start_state, - Some(segment.num_insns), - &segment.trace_heights, - )?; - Ok(segment_proof) - }, - ) - }) - .await? - }); + let task = spawn( + async move { + let _permit = semaphore.acquire().await?; + let span = tracing::Span::current(); + spawn_blocking(move || { + let _span = span.enter(); + info_span!("prove_segment", segment = seg_idx).in_scope( + || -> eyre::Result<_> { + // We need a separate span so the metric label includes + // "segment" + // from _segment_span + let _prove_span = info_span!("vm_prove").entered(); + let mut worker = async_worker.local()?; + let instance = &mut worker.instance; + let vm = &mut instance.vm; + let preflight_interpreter = &mut instance.interpreter; + let (segment_proof, _) = vm.prove( + preflight_interpreter, + start_state, + Some(segment.num_insns), + &segment.trace_heights, + )?; + Ok(segment_proof) + }, + ) + }) + .await? + } + .in_current_span(), + ); tasks.push(task); } // Finish execution to termination