diff --git a/crates/fflonk/Cargo.toml b/crates/fflonk/Cargo.toml index 46638b1..3236024 100644 --- a/crates/fflonk/Cargo.toml +++ b/crates/fflonk/Cargo.toml @@ -12,7 +12,7 @@ description = "CUDA implementation of the fflonk prover" exclude = ["/data"] [dependencies] -fflonk-cpu = {workspace = true} +fflonk-cpu = {workspace = true, optional = true} circuit_definitions.workspace = true gpu-ffi.workspace = true rand = "0.4" @@ -24,5 +24,6 @@ serde_json = "1" serde_derive = "1" [features] -default = [] +default = ["fflonk-cpu"] sanity = [] +allocator = ["fflonk-cpu/allocator"] diff --git a/crates/fflonk/src/context.rs b/crates/fflonk/src/context.rs index 38b14a1..fec31e1 100644 --- a/crates/fflonk/src/context.rs +++ b/crates/fflonk/src/context.rs @@ -55,7 +55,7 @@ pub(crate) fn init_tmp_mempool() { unsafe { _TMP_MEMPOOL = Some(bc_mem_pool::new(DEFAULT_DEVICE_ID).unwrap()); } - let num_tmp_bytes = 1_100_000_000; + let num_tmp_bytes = 3 << 29; //1.5GB let stream = bc_stream::new().unwrap(); DVec::::allocate_on(num_tmp_bytes, _tmp_mempool(), stream); } @@ -146,9 +146,19 @@ const POWERS_OF_COSET_OMEGA_COARSE_LOG_COUNT: u32 = 14; pub type DeviceContextWithSingleDevice = DeviceContext<1>; impl DeviceContext { + pub fn init_from_preloaded_crs( + domain_size: usize, + crs: Crs, + ) -> CudaResult { + let context = Self::init_no_msm(domain_size)?; + Self::init_msm_on_static_memory(domain_size, Some(crs))?; + + Ok(context) + } + pub fn init(domain_size: usize) -> CudaResult { let context = Self::init_no_msm(domain_size)?; - Self::init_msm_on_static_memory(domain_size)?; + Self::init_msm_on_static_memory(domain_size, None)?; // Self::init_msm_on_pool(domain_size)?; Ok(context) @@ -179,8 +189,11 @@ impl DeviceContext { Ok(DeviceContext) } - fn init_msm_on_static_memory(domain_size: usize) -> CudaResult<()> { - Self::inner_init_msm(domain_size, None, None)?; + fn init_msm_on_static_memory( + domain_size: usize, + crs: Option>, + ) -> CudaResult<()> { + Self::inner_init_msm(domain_size, crs, None, None)?; Ok(()) } @@ -188,13 +201,14 @@ impl DeviceContext { unsafe fn init_msm_on_pool(domain_size: usize) -> CudaResult<()> { let pool = _msm_bases_mempool(); let stream = bc_stream::new().unwrap(); - Self::inner_init_msm(domain_size, Some(pool), Some(stream))?; + Self::inner_init_msm(domain_size, None, Some(pool), Some(stream))?; stream.sync().unwrap(); Ok(()) } fn inner_init_msm( domain_size: usize, + crs: Option>, pool: Option, stream: Option, ) -> CudaResult<()> { @@ -205,7 +219,10 @@ impl DeviceContext { init_msm_result_mempool(); // MSM impl requires bases to be located in a buffer that is // multiple of the domain_size - let crs = init_compact_crs(&bellman::worker::Worker::new(), domain_size); + let crs = match crs { + Some(preloaded_crs) => preloaded_crs, + None => init_compact_crs(&bellman::worker::Worker::new(), domain_size), + }; let num_bases = MAX_COMBINED_DEGREE_FACTOR * domain_size; assert!(crs.g1_bases.len() >= num_bases); let bases = match (pool, stream) { diff --git a/crates/fflonk/src/convenience.rs b/crates/fflonk/src/convenience.rs index 4e6bc91..c05b304 100644 --- a/crates/fflonk/src/convenience.rs +++ b/crates/fflonk/src/convenience.rs @@ -3,7 +3,7 @@ use bellman::{ kate_commitment::{Crs, CrsForMonomialForm}, plonk::{ better_better_cs::cs::{ - Circuit, SynthesisModeGenerateSetup, SynthesisModeProve, SynthesisModeTesting, + Circuit, SynthesisModeProve, SynthesisModeTesting, }, commitments::transcript::keccak_transcript::RollingKeccakTranscript, }, @@ -14,8 +14,8 @@ use circuit_definitions::circuit_definitions::aux_layer::{ }; use fflonk::{FflonkAssembly, L1_VERIFIER_DOMAIN_SIZE_LOG}; -pub type FflonkSnarkVerifierCircuitDeviceSetup = - FflonkDeviceSetup; +pub type FflonkSnarkVerifierCircuitDeviceSetup = + FflonkDeviceSetup; use super::*; @@ -190,7 +190,7 @@ pub fn precompute_and_save_setup_and_vk_for_fflonk_snark_circuit( println!("Generating fflonk setup data on the device"); let device_setup = - FflonkSnarkVerifierCircuitDeviceSetup::create_setup_on_device(&circuit).unwrap(); + FflonkSnarkVerifierCircuitDeviceSetup::::create_setup_on_device(&circuit).unwrap(); let setup_file_path = format!("{}/final_snark_device_setup.bin", path); println!("Saving setup into file {setup_file_path}"); let device_setup_file = std::fs::File::create(&setup_file_path).unwrap(); diff --git a/crates/fflonk/src/lib.rs b/crates/fflonk/src/lib.rs index 02256f5..5ebd7a3 100644 --- a/crates/fflonk/src/lib.rs +++ b/crates/fflonk/src/lib.rs @@ -63,7 +63,7 @@ pub use gpu_ffi; use gpu_ffi::{bc_event, bc_mem_pool, bc_stream}; use std::alloc::Allocator; -pub use context::{DeviceContext, DeviceContextWithSingleDevice}; +pub use context::{init_compact_crs, DeviceContext, DeviceContextWithSingleDevice}; pub use fflonk::MAX_COMBINED_DEGREE_FACTOR; pub use convenience::FflonkSnarkVerifierCircuitDeviceSetup; @@ -74,3 +74,5 @@ pub type FflonkSnarkVerifierCircuit = ZkSyncSnarkWrapperCircuitNoLookupCustomGat pub type FflonkSnarkVerifierCircuitVK = FflonkVerificationKey; pub type FflonkSnarkVerifierCircuitProof = FflonkProof; pub type FflonkSnarkVerifierCircuitSetup = FflonkSetup; + +pub use allocator::{HostAllocator, GlobalHost}; diff --git a/crates/fflonk/src/setup.rs b/crates/fflonk/src/setup.rs index bb8e3d3..839d779 100644 --- a/crates/fflonk/src/setup.rs +++ b/crates/fflonk/src/setup.rs @@ -254,12 +254,7 @@ impl, A: HostAllocator> FflonkDeviceSetup { assert_eq!(num_polys, 5); let mut main_gate_selector_monomials = vec![]; for _ in 0..num_polys { - let num_values = reader.read_u64::()?; - let mut coeffs = Vec::with_capacity_in(num_values as usize, A::default()); - for _ in 0..num_values { - let el = read_fr(&mut reader)?; - coeffs.push(el); - } + let coeffs = read_raw_fr_vec::<_, _, A>(&mut reader)?; main_gate_selector_monomials.push(coeffs); } @@ -267,12 +262,16 @@ impl, A: HostAllocator> FflonkDeviceSetup { assert_eq!(num_polys, 3); let mut variable_indexes = vec![]; for _ in 0..num_polys { - let num_values = reader.read_u64::()?; - let mut indexes = Vec::with_capacity_in(num_values as usize, A::default()); - for _ in 0..num_values { - let el = reader.read_u32::()?; - indexes.push(el); - } + let num_values = reader.read_u64::()? as usize; + let mut indexes = Vec::with_capacity_in(num_values, A::default()); + let indexes_buf = unsafe { + indexes.set_len(num_values); + std::slice::from_raw_parts_mut( + indexes.as_mut_ptr() as *mut u8, + num_values * std::mem::size_of::(), + ) + }; + reader.read_exact(indexes_buf)?; variable_indexes.push(indexes); } @@ -293,17 +292,58 @@ impl, A: HostAllocator> FflonkDeviceSetup { use byteorder::{BigEndian, WriteBytesExt}; writer.write_u64::(self.main_gate_selector_monomials.len() as u64)?; for mon in self.main_gate_selector_monomials.iter() { - write_fr_vec(&mon, &mut writer)?; + write_raw_fr_slice(&mon, &mut writer)?; } writer.write_u64::(self.variable_indexes.len() as u64)?; for col in self.variable_indexes.iter() { writer.write_u64::(col.len() as u64)?; - for el in col { - writer.write_u32::(*el)?; - } + let buf = unsafe { + std::slice::from_raw_parts( + col.as_ptr() as *mut u8, + col.len() * std::mem::size_of::(), + ) + }; + writer.write_all(buf)?; } write_curve_affine(&self.c0_commitment, &mut writer)?; write_curve_affine(&self.g2_elems[0], &mut writer)?; write_curve_affine(&self.g2_elems[1], writer) } } + +pub fn read_raw_fr_vec( + mut src: R, +) -> std::io::Result> { + use byteorder::{BigEndian, ReadBytesExt}; + let num_values = src.read_u32::()? as usize; + let mut values = Vec::with_capacity_in(num_values, A::default()); + unsafe { + values.set_len(num_values); + let buf = std::slice::from_raw_parts_mut( + values.as_mut_ptr() as *mut u8, + num_values * std::mem::size_of::(), + ); + src.read_exact(buf)?; + } + + Ok(values) +} + +pub fn write_raw_fr_slice( + src_values: &[F], + mut dst: W, +) -> std::io::Result<()> { + use byteorder::{BigEndian, WriteBytesExt}; + let num_values = src_values.len(); + assert!(num_values < u32::MAX as usize); + dst.write_u32::(num_values as u32)?; + unsafe { + let buf = std::slice::from_raw_parts_mut( + src_values.as_ptr() as *mut u8, + num_values * std::mem::size_of::(), + ); + dst.write_all(buf)?; + } + + Ok(()) +} diff --git a/crates/gpu-prover/Cargo.toml b/crates/gpu-prover/Cargo.toml index 3bb0eab..291c64d 100644 --- a/crates/gpu-prover/Cargo.toml +++ b/crates/gpu-prover/Cargo.toml @@ -11,7 +11,7 @@ name = "zksync-gpu-prover" description = "ZKsync GPU prover utilities" [dependencies] -franklin-crypto = { workspace = true, optional = true } +franklin-crypto = { workspace = true, default-features = true, optional = true } gpu-ffi.workspace = true crossbeam = "0.8" @@ -23,6 +23,5 @@ bit-vec = "0.6" serde = {version = "1", features = ["derive", "rc"]} [features] -default = ["allocator"] -no_allocator = ["franklin-crypto"] +default = ["franklin-crypto"] allocator = ["franklin-crypto/allocator"]