Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions crates/fflonk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ description = "CUDA implementation of the fflonk prover"
exclude = ["/data"]

[dependencies]
fflonk-cpu = {workspace = true}
fflonk-cpu = {workspace = true, optional = true}
circuit_definitions.workspace = true
gpu-ffi.workspace = true
rand = "0.4"
Expand All @@ -24,5 +24,6 @@ serde_json = "1"
serde_derive = "1"

[features]
default = []
default = ["fflonk-cpu"]
sanity = []
allocator = ["fflonk-cpu/allocator"]
29 changes: 23 additions & 6 deletions crates/fflonk/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pub(crate) fn init_tmp_mempool() {
unsafe {
_TMP_MEMPOOL = Some(bc_mem_pool::new(DEFAULT_DEVICE_ID).unwrap());
}
let num_tmp_bytes = 1_100_000_000;
let num_tmp_bytes = 3 << 29; //1.5GB
let stream = bc_stream::new().unwrap();
DVec::<u8, PoolAllocator>::allocate_on(num_tmp_bytes, _tmp_mempool(), stream);
}
Expand Down Expand Up @@ -146,9 +146,19 @@ const POWERS_OF_COSET_OMEGA_COARSE_LOG_COUNT: u32 = 14;
pub type DeviceContextWithSingleDevice = DeviceContext<1>;

impl<const N: usize> DeviceContext<N> {
pub fn init_from_preloaded_crs(
domain_size: usize,
crs: Crs<CompactBn256, CrsForMonomialForm>,
) -> CudaResult<Self> {
let context = Self::init_no_msm(domain_size)?;
Self::init_msm_on_static_memory(domain_size, Some(crs))?;

Ok(context)
}

pub fn init(domain_size: usize) -> CudaResult<Self> {
let context = Self::init_no_msm(domain_size)?;
Self::init_msm_on_static_memory(domain_size)?;
Self::init_msm_on_static_memory(domain_size, None)?;
// Self::init_msm_on_pool(domain_size)?;

Ok(context)
Expand Down Expand Up @@ -179,22 +189,26 @@ impl<const N: usize> DeviceContext<N> {
Ok(DeviceContext)
}

fn init_msm_on_static_memory(domain_size: usize) -> CudaResult<()> {
Self::inner_init_msm(domain_size, None, None)?;
fn init_msm_on_static_memory(
domain_size: usize,
crs: Option<Crs<CompactBn256, CrsForMonomialForm>>,
) -> CudaResult<()> {
Self::inner_init_msm(domain_size, crs, None, None)?;
Ok(())
}

// In reality we keep bases on a statically allocated buffer.
unsafe fn init_msm_on_pool(domain_size: usize) -> CudaResult<()> {
let pool = _msm_bases_mempool();
let stream = bc_stream::new().unwrap();
Self::inner_init_msm(domain_size, Some(pool), Some(stream))?;
Self::inner_init_msm(domain_size, None, Some(pool), Some(stream))?;
stream.sync().unwrap();
Ok(())
}

fn inner_init_msm(
domain_size: usize,
crs: Option<Crs<CompactBn256, CrsForMonomialForm>>,
pool: Option<bc_mem_pool>,
stream: Option<bc_stream>,
) -> CudaResult<()> {
Expand All @@ -205,7 +219,10 @@ impl<const N: usize> DeviceContext<N> {
init_msm_result_mempool();
// MSM impl requires bases to be located in a buffer that is
// multiple of the domain_size
let crs = init_compact_crs(&bellman::worker::Worker::new(), domain_size);
let crs = match crs {
Some(preloaded_crs) => preloaded_crs,
None => init_compact_crs(&bellman::worker::Worker::new(), domain_size),
};
let num_bases = MAX_COMBINED_DEGREE_FACTOR * domain_size;
assert!(crs.g1_bases.len() >= num_bases);
let bases = match (pool, stream) {
Expand Down
8 changes: 4 additions & 4 deletions crates/fflonk/src/convenience.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use bellman::{
kate_commitment::{Crs, CrsForMonomialForm},
plonk::{
better_better_cs::cs::{
Circuit, SynthesisModeGenerateSetup, SynthesisModeProve, SynthesisModeTesting,
Circuit, SynthesisModeProve, SynthesisModeTesting,
},
commitments::transcript::keccak_transcript::RollingKeccakTranscript,
},
Expand All @@ -14,8 +14,8 @@ use circuit_definitions::circuit_definitions::aux_layer::{
};
use fflonk::{FflonkAssembly, L1_VERIFIER_DOMAIN_SIZE_LOG};

pub type FflonkSnarkVerifierCircuitDeviceSetup =
FflonkDeviceSetup<Bn256, FflonkSnarkVerifierCircuit>;
pub type FflonkSnarkVerifierCircuitDeviceSetup<A: HostAllocator = GlobalHost> =
FflonkDeviceSetup<Bn256, FflonkSnarkVerifierCircuit, A>;

use super::*;

Expand Down Expand Up @@ -190,7 +190,7 @@ pub fn precompute_and_save_setup_and_vk_for_fflonk_snark_circuit(

println!("Generating fflonk setup data on the device");
let device_setup =
FflonkSnarkVerifierCircuitDeviceSetup::create_setup_on_device(&circuit).unwrap();
FflonkSnarkVerifierCircuitDeviceSetup::<GlobalHost>::create_setup_on_device(&circuit).unwrap();
let setup_file_path = format!("{}/final_snark_device_setup.bin", path);
println!("Saving setup into file {setup_file_path}");
let device_setup_file = std::fs::File::create(&setup_file_path).unwrap();
Expand Down
4 changes: 3 additions & 1 deletion crates/fflonk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pub use gpu_ffi;
use gpu_ffi::{bc_event, bc_mem_pool, bc_stream};
use std::alloc::Allocator;

pub use context::{DeviceContext, DeviceContextWithSingleDevice};
pub use context::{init_compact_crs, DeviceContext, DeviceContextWithSingleDevice};
pub use fflonk::MAX_COMBINED_DEGREE_FACTOR;

pub use convenience::FflonkSnarkVerifierCircuitDeviceSetup;
Expand All @@ -74,3 +74,5 @@ pub type FflonkSnarkVerifierCircuit = ZkSyncSnarkWrapperCircuitNoLookupCustomGat
pub type FflonkSnarkVerifierCircuitVK = FflonkVerificationKey<Bn256, FflonkSnarkVerifierCircuit>;
pub type FflonkSnarkVerifierCircuitProof = FflonkProof<Bn256, FflonkSnarkVerifierCircuit>;
pub type FflonkSnarkVerifierCircuitSetup = FflonkSetup<Bn256, FflonkSnarkVerifierCircuit>;

pub use allocator::{HostAllocator, GlobalHost};
72 changes: 56 additions & 16 deletions crates/fflonk/src/setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,25 +254,24 @@ impl<E: Engine, C: Circuit<E>, A: HostAllocator> FflonkDeviceSetup<E, C, A> {
assert_eq!(num_polys, 5);
let mut main_gate_selector_monomials = vec![];
for _ in 0..num_polys {
let num_values = reader.read_u64::<BigEndian>()?;
let mut coeffs = Vec::with_capacity_in(num_values as usize, A::default());
for _ in 0..num_values {
let el = read_fr(&mut reader)?;
coeffs.push(el);
}
let coeffs = read_raw_fr_vec::<_, _, A>(&mut reader)?;
main_gate_selector_monomials.push(coeffs);
}

let num_polys = reader.read_u64::<BigEndian>()?;
assert_eq!(num_polys, 3);
let mut variable_indexes = vec![];
for _ in 0..num_polys {
let num_values = reader.read_u64::<BigEndian>()?;
let mut indexes = Vec::with_capacity_in(num_values as usize, A::default());
for _ in 0..num_values {
let el = reader.read_u32::<BigEndian>()?;
indexes.push(el);
}
let num_values = reader.read_u64::<BigEndian>()? as usize;
let mut indexes = Vec::with_capacity_in(num_values, A::default());
let indexes_buf = unsafe {
indexes.set_len(num_values);
std::slice::from_raw_parts_mut(
indexes.as_mut_ptr() as *mut u8,
num_values * std::mem::size_of::<u32>(),
)
};
reader.read_exact(indexes_buf)?;
variable_indexes.push(indexes);
}

Expand All @@ -293,17 +292,58 @@ impl<E: Engine, C: Circuit<E>, A: HostAllocator> FflonkDeviceSetup<E, C, A> {
use byteorder::{BigEndian, WriteBytesExt};
writer.write_u64::<BigEndian>(self.main_gate_selector_monomials.len() as u64)?;
for mon in self.main_gate_selector_monomials.iter() {
write_fr_vec(&mon, &mut writer)?;
write_raw_fr_slice(&mon, &mut writer)?;
}
writer.write_u64::<BigEndian>(self.variable_indexes.len() as u64)?;
for col in self.variable_indexes.iter() {
writer.write_u64::<BigEndian>(col.len() as u64)?;
for el in col {
writer.write_u32::<BigEndian>(*el)?;
}
let buf = unsafe {
std::slice::from_raw_parts(
col.as_ptr() as *mut u8,
col.len() * std::mem::size_of::<u32>(),
)
};
writer.write_all(buf)?;
}
write_curve_affine(&self.c0_commitment, &mut writer)?;
write_curve_affine(&self.g2_elems[0], &mut writer)?;
write_curve_affine(&self.g2_elems[1], writer)
}
}

pub fn read_raw_fr_vec<F: PrimeField, R: std::io::Read, A: Allocator + Default>(
mut src: R,
) -> std::io::Result<Vec<F, A>> {
use byteorder::{BigEndian, ReadBytesExt};
let num_values = src.read_u32::<BigEndian>()? as usize;
let mut values = Vec::with_capacity_in(num_values, A::default());
unsafe {
values.set_len(num_values);
let buf = std::slice::from_raw_parts_mut(
values.as_mut_ptr() as *mut u8,
num_values * std::mem::size_of::<F>(),
);
src.read_exact(buf)?;
}

Ok(values)
}

pub fn write_raw_fr_slice<F: PrimeField, W: std::io::Write>(
src_values: &[F],
mut dst: W,
) -> std::io::Result<()> {
use byteorder::{BigEndian, WriteBytesExt};
let num_values = src_values.len();
assert!(num_values < u32::MAX as usize);
dst.write_u32::<BigEndian>(num_values as u32)?;
unsafe {
let buf = std::slice::from_raw_parts_mut(
src_values.as_ptr() as *mut u8,
num_values * std::mem::size_of::<F>(),
);
dst.write_all(buf)?;
}

Ok(())
}
5 changes: 2 additions & 3 deletions crates/gpu-prover/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ name = "zksync-gpu-prover"
description = "ZKsync GPU prover utilities"

[dependencies]
franklin-crypto = { workspace = true, optional = true }
franklin-crypto = { workspace = true, default-features = true, optional = true }
gpu-ffi.workspace = true

crossbeam = "0.8"
Expand All @@ -23,6 +23,5 @@ bit-vec = "0.6"
serde = {version = "1", features = ["derive", "rc"]}

[features]
default = ["allocator"]
no_allocator = ["franklin-crypto"]
default = ["franklin-crypto"]
allocator = ["franklin-crypto/allocator"]