diff --git a/crates/gpu-prover/src/cuda_bindings/async_vec.rs b/crates/gpu-prover/src/cuda_bindings/async_vec.rs index 2015794..2536149 100644 --- a/crates/gpu-prover/src/cuda_bindings/async_vec.rs +++ b/crates/gpu-prover/src/cuda_bindings/async_vec.rs @@ -3,11 +3,12 @@ use bellman::PrimeField; use core::ops::Range; use std::io::{Read, Write}; -pub struct AsyncVec { +pub struct AsyncVec { #[cfg(feature = "allocator")] - pub values: Option>, + values: Option>, #[cfg(not(feature = "allocator"))] - pub values: Option>, + values: Option>, + pub(crate) data_is_set: bool, pub(crate) read_event: Event, pub(crate) write_event: Event, } @@ -17,10 +18,10 @@ use std::fmt; macro_rules! impl_async_vec { (impl AsyncVec $inherent:tt) => { #[cfg(feature = "allocator")] - impl AsyncVec $inherent + impl AsyncVec $inherent #[cfg(not(feature = "allocator"))] - impl AsyncVec $inherent + impl AsyncVec $inherent }; } @@ -37,17 +38,20 @@ impl_async_vec! { Self { values: Some(values), + data_is_set: false, read_event: Event::new(), write_event: Event::new(), } } pub fn get_values(&self) -> GpuResult<&[T]> { + assert!(self.data_is_set, "AsyncVec should be filled with some data"); self.write_event.sync()?; Ok(self.values.as_ref().expect("async_vec inner is none")) } pub fn get_values_mut(&mut self) -> GpuResult<&mut [T]> { + assert!(self.data_is_set, "AsyncVec should be filled with some data"); self.read_event.sync()?; self.write_event.sync()?; Ok(self.values.as_mut().expect("async_vec inner is none")) @@ -60,6 +64,7 @@ impl_async_vec! { this_range: Range, other_range: Range, ) -> GpuResult<()> { + assert!(self.data_is_set, "AsyncVec should be filled with some data"); assert_eq!(this_range.len(), other_range.len()); let length = std::mem::size_of::() * this_range.len(); set_device(ctx.device_id())?; @@ -76,6 +81,7 @@ impl_async_vec! { ctx.h2d_stream().inner, ) }; + other.data_is_set = true; if result != 0 { return Err(GpuError::AsyncH2DErr(result)); @@ -94,6 +100,7 @@ impl_async_vec! { this_range: Range, other_range: Range, ) -> GpuResult<()> { + assert!(other.data_is_set, "DeviceBuf should be filled with some data"); assert_eq!(this_range.len(), other_range.len()); let length = std::mem::size_of::() * this_range.len(); set_device(ctx.device_id())?; @@ -117,6 +124,7 @@ impl_async_vec! { self.write_event.record(ctx.d2h_stream())?; other.read_event.record(ctx.d2h_stream())?; + self.data_is_set = true; Ok(()) } @@ -126,6 +134,7 @@ impl_async_vec! { } #[cfg(feature = "allocator")] pub fn into_inner(mut self) -> GpuResult> { + assert!(self.data_is_set, "AsyncVec should be filled with some data"); self.read_event.sync()?; self.write_event.sync()?; @@ -134,6 +143,7 @@ impl_async_vec! { #[cfg(not(feature = "allocator"))] pub fn into_inner(mut self) -> GpuResult> { + assert!(self.data_is_set, "AsyncVec should be filled with some data"); self.read_event.sync()?; self.write_event.sync()?; @@ -156,26 +166,59 @@ impl_async_vec! { self.values.as_mut().expect("async_vec inner is none")[range].as_mut_ptr() } - pub fn zeroize(&mut self){ - let unit_len = std::mem::size_of::(); - let total_len = unit_len * self.len(); - let dst = self.as_mut_ptr(0..self.len()) as *mut u8; - unsafe{std::ptr::write_bytes(dst, 0, total_len)}; + pub fn fill(&mut self, value: T) -> GpuResult<()> { + self.read_event.sync()?; + self.write_event.sync()?; + + self.values.as_mut().unwrap().fill(value); + self.data_is_set = true; + + Ok(()) + } + + pub fn copy_from_slice(&mut self, src: &[T]) -> GpuResult<()> { + self.read_event.sync()?; + self.write_event.sync()?; + + // copy_from_slice checks the equality of lengths + self.values.as_mut().unwrap().copy_from_slice(src); + self.data_is_set = true; + + Ok(()) + } + + pub fn async_copy_from_slice(&mut self, worker: &Worker, src: &[T]) -> GpuResult<()> + where T: Send + Sync + { + self.read_event.sync()?; + self.write_event.sync()?; + + // async_copy checks the equality of lengths + async_copy( + worker, + self.values.as_mut().unwrap(), + src, + ); + self.data_is_set = true; + + Ok(()) } } } #[cfg(feature = "allocator")] -impl fmt::Debug for AsyncVec { +impl fmt::Debug for AsyncVec { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + assert!(self.data_is_set, "AsyncVec should be filled with some data"); f.debug_struct("AsyncVec") .field("Values", &self.get_values().unwrap()) .finish() } } #[cfg(not(feature = "allocator"))] -impl fmt::Debug for AsyncVec { +impl fmt::Debug for AsyncVec { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + assert!(self.data_is_set, "AsyncVec should be filled with some data"); f.debug_struct("AsyncVec") .field("Values", &self.get_values().unwrap()) .finish() @@ -183,20 +226,22 @@ impl fmt::Debug for AsyncVec { } #[cfg(feature = "allocator")] -impl From> for AsyncVec { +impl From> for AsyncVec { fn from(values: Vec) -> Self { Self { values: Some(values), + data_is_set: true, read_event: Event::new(), write_event: Event::new(), } } } #[cfg(not(feature = "allocator"))] -impl From> for AsyncVec { +impl From> for AsyncVec { fn from(values: Vec) -> Self { Self { values: Some(values), + data_is_set: true, read_event: Event::new(), write_event: Event::new(), } @@ -204,21 +249,21 @@ impl From> for AsyncVec { } #[cfg(feature = "allocator")] -impl From> for Vec { +impl From> for Vec { fn from(vector: AsyncVec) -> Self { vector.into_inner().unwrap() } } #[cfg(not(feature = "allocator"))] -impl From> for Vec { +impl From> for Vec { fn from(vector: AsyncVec) -> Self { vector.into_inner().unwrap() } } #[cfg(feature = "allocator")] -impl Drop for AsyncVec { +impl Drop for AsyncVec { fn drop(&mut self) { self.read_event.sync().unwrap(); self.write_event.sync().unwrap(); @@ -226,7 +271,7 @@ impl Drop for AsyncVec { } #[cfg(not(feature = "allocator"))] -impl Drop for AsyncVec { +impl Drop for AsyncVec { fn drop(&mut self) { self.read_event.sync().unwrap(); self.write_event.sync().unwrap(); @@ -263,6 +308,7 @@ impl_async_vec_for_field! { } pub fn to_bytes(&self, dst: &mut [u8]) -> GpuResult<()> { + assert!(self.data_is_set, "AsyncVec should be filled with some data"); let length = self.len(); let F_SIZE = F::zero().into_raw_repr().as_ref().len() * 8; assert_eq!(length * F_SIZE, dst.len(), "Wrong destination length"); @@ -306,6 +352,8 @@ impl_async_vec_for_field! { ) }; + self.data_is_set = true; + Ok(()) } } diff --git a/crates/gpu-prover/src/cuda_bindings/device_arithmetic.rs b/crates/gpu-prover/src/cuda_bindings/device_arithmetic.rs index 2da79eb..90e3b12 100644 --- a/crates/gpu-prover/src/cuda_bindings/device_arithmetic.rs +++ b/crates/gpu-prover/src/cuda_bindings/device_arithmetic.rs @@ -230,6 +230,8 @@ impl DeviceBuf { ); let constant = constant.expect("constant should be Some in SetValue operation"); + self.data_is_set = true; + ff_set_value( self.as_mut_ptr(range) as *mut c_void, &constant as *const Fr as *const c_void, @@ -245,8 +247,10 @@ impl DeviceBuf { return Err(GpuError::ArithmeticErr(result)); } + assert!(self.data_is_set, "DeviceBuf should be filled with some data"); self.write_event.record(&ctx.exec_stream)?; if let Some(other) = other { + assert!(other.data_is_set, "DeviceBuf should be filled with some data"); other.read_event.record(&ctx.exec_stream)?; } @@ -263,6 +267,8 @@ impl DeviceBuf { shift: usize, inverse: bool, ) -> GpuResult<()> { + assert!(self.data_is_set, "DeviceBuf should be filled with some data"); + assert!( ctx.ff, "ff is not set up on GpuContext with id {}", diff --git a/crates/gpu-prover/src/cuda_bindings/device_buf.rs b/crates/gpu-prover/src/cuda_bindings/device_buf.rs index 694497b..427b29e 100644 --- a/crates/gpu-prover/src/cuda_bindings/device_buf.rs +++ b/crates/gpu-prover/src/cuda_bindings/device_buf.rs @@ -1,11 +1,12 @@ use super::*; use core::ops::Range; -pub struct DeviceBuf { +pub struct DeviceBuf { pub(crate) ptr: *mut T, pub(crate) len: usize, pub(crate) device_id: usize, + pub(crate) data_is_set: bool, pub(crate) is_static_mem: bool, pub(crate) is_freed: bool, @@ -13,7 +14,7 @@ pub struct DeviceBuf { pub(crate) write_event: Event, } -impl DeviceBuf { +impl DeviceBuf { pub fn alloc_static(ctx: &GpuContext, len: usize) -> GpuResult { set_device(ctx.device_id())?; assert!(ctx.mem_pool.is_none(), "mem pool is allocated"); @@ -32,6 +33,7 @@ impl DeviceBuf { len: len, device_id: ctx.device_id(), + data_is_set: false, is_static_mem: true, is_freed: false, @@ -64,6 +66,7 @@ impl DeviceBuf { len: len, device_id: ctx.device_id(), + data_is_set: false, is_static_mem: false, is_freed: false, @@ -96,6 +99,7 @@ impl DeviceBuf { len: len, device_id: ctx.device_id(), + data_is_set: false, is_static_mem: false, is_freed: false, @@ -127,6 +131,7 @@ impl DeviceBuf { len: chunk_len, device_id: self.device_id, + data_is_set: self.data_is_set, is_static_mem: self.is_static_mem, is_freed: true, @@ -174,6 +179,7 @@ impl DeviceBuf { ctx.exec_stream.wait(other.read_event())?; ctx.exec_stream.wait(other.write_event())?; + assert!(self.data_is_set, "DeviceBuf should be filled with some data"); let result = unsafe { bc_memcpy_async( other.as_mut_ptr(other_range) as *mut c_void, @@ -182,6 +188,7 @@ impl DeviceBuf { ctx.exec_stream().inner, ) }; + other.data_is_set = true; if result != 0 { return Err(GpuError::AsyncH2DErr(result)); @@ -214,6 +221,7 @@ impl DeviceBuf { length as u64, ctx.h2d_stream().inner, ); + self.data_is_set = true; if result != 0 { return Err(GpuError::AsyncMemcopyErr(result)); @@ -287,7 +295,7 @@ impl DeviceBuf { } } -impl Drop for DeviceBuf { +impl Drop for DeviceBuf { fn drop(&mut self) { if !self.is_freed { self.read_event.sync().unwrap(); diff --git a/crates/gpu-prover/src/cuda_bindings/device_heavy_ops.rs b/crates/gpu-prover/src/cuda_bindings/device_heavy_ops.rs index a2ed075..e39f762 100644 --- a/crates/gpu-prover/src/cuda_bindings/device_heavy_ops.rs +++ b/crates/gpu-prover/src/cuda_bindings/device_heavy_ops.rs @@ -17,6 +17,8 @@ impl DeviceBuf { let mut result = DeviceBuf::::async_alloc_in_exec(ctx, 254)?; ctx.exec_stream.wait(self.write_event())?; + assert!(self.data_is_set, "DeviceBuf should be filled with some data"); + let null_ptr = std::ptr::null_mut() as *mut c_void; let null_event = bc_event { handle: null_ptr }; @@ -44,6 +46,7 @@ impl DeviceBuf { self.read_event.record(ctx.exec_stream())?; result.write_event.record(ctx.exec_stream())?; + result.data_is_set = true; Ok(result) } @@ -60,6 +63,8 @@ impl DeviceBuf { let mut result = DeviceBuf::::async_alloc_in_exec(ctx, 1)?; ctx.exec_stream.wait(self.write_event())?; + assert!(self.data_is_set, "DeviceBuf should be filled with some data"); + let cfg = ff_poly_evaluate_configuration { mem_pool: ctx.mem_pool.unwrap(), stream: ctx.exec_stream.inner, @@ -78,6 +83,7 @@ impl DeviceBuf { self.read_event.record(ctx.exec_stream())?; result.write_event.record(ctx.exec_stream())?; + result.data_is_set = true; Ok(result) } @@ -97,6 +103,8 @@ impl DeviceBuf { for buffer in buffers.iter() { ctx.exec_stream.wait(buffer.write_event())?; ctx.exec_stream.wait(buffer.read_event())?; + + assert!(buffer.data_is_set, "DeviceBuf should be filled with some data"); } for i in 0..(buffers.len() - 1) { @@ -221,6 +229,8 @@ impl DeviceBuf { ctx[buffer_idx].exec_stream.wait(buffer.write_event())?; ctx[buffer_idx].exec_stream.wait(buffer.read_event())?; + assert!(buffer.data_is_set, "DeviceBuf should be filled with some data"); + let d_scalars = buffer.as_ptr(0..buffer.len()); let cfg = ntt_configuration { @@ -317,6 +327,8 @@ impl DeviceBuf { ctx[ctx_id].exec_stream.wait(buffer.write_event())?; ctx[ctx_id].exec_stream.wait(buffer.read_event())?; + assert!(buffer.data_is_set, "DeviceBuf should be filled with some data"); + let d_scalars = buffer.as_ptr(0..buffer.len()); let cfg = ntt_configuration { @@ -380,6 +392,7 @@ impl DeviceBuf { for buffer in buffers.iter() { ctx.exec_stream.wait(buffer.write_event())?; ctx.exec_stream.wait(buffer.read_event())?; + assert!(buffer.data_is_set, "DeviceBuf should be filled with some data"); } for i in 0..(buffers.len() - 1) { @@ -445,6 +458,7 @@ impl DeviceBuf { set_device(device_id)?; ctx[buffer_idx].exec_stream.wait(buffer.write_event())?; ctx[buffer_idx].exec_stream.wait(buffer.read_event())?; + assert!(buffer.data_is_set, "DeviceBuf should be filled with some data"); let d_scalars = buffer.as_ptr(0..buffer.len()); diff --git a/crates/gpu-prover/src/memory_manager/copying_operations.rs b/crates/gpu-prover/src/memory_manager/copying_operations.rs index ebc280e..7ea100f 100644 --- a/crates/gpu-prover/src/memory_manager/copying_operations.rs +++ b/crates/gpu-prover/src/memory_manager/copying_operations.rs @@ -59,11 +59,8 @@ impl DeviceMemoryManager { // .get_values_mut() // .unwrap() // .copy_from_slice(poly); - async_copy( - worker, - self.host_slots[host_idx].0.get_values_mut().unwrap(), - poly, - ); + + self.host_slots[host_idx].0.async_copy_from_slice(worker, poly).unwrap(); let idx = self.free_slot_idx().expect("No free slots"); @@ -123,7 +120,7 @@ impl DeviceMemoryManager { async_copy( worker, poly, - self.host_slots[host_idx].0.get_values_mut().unwrap(), + self.host_slots[host_idx].0.get_values().unwrap(), ); Ok(()) diff --git a/crates/gpu-prover/src/memory_manager/proving_operations/columns_sorting.rs b/crates/gpu-prover/src/memory_manager/proving_operations/columns_sorting.rs index 4abdda4..dc0c96e 100644 --- a/crates/gpu-prover/src/memory_manager/proving_operations/columns_sorting.rs +++ b/crates/gpu-prover/src/memory_manager/proving_operations/columns_sorting.rs @@ -278,6 +278,8 @@ fn sort_indexes( stream.wait(buffers.2.write_event())?; stream.wait(buffers.2.read_event())?; + assert!(buffers.1.data_is_set, "DeviceBuf should be filled with some data"); + unsafe { let stream = manager.ctx[ctx_id].exec_stream.inner; let mem_pool = manager.ctx[ctx_id] @@ -303,6 +305,7 @@ fn sort_indexes( let mut stream = &mut manager.ctx[ctx_id].exec_stream; buffers.1.read_event.record(stream)?; buffers.2.write_event.record(stream)?; + buffers.2.data_is_set = true; Ok(()) } @@ -314,6 +317,9 @@ fn assign_columns( offset_in_result: usize, ctx_id: usize, ) -> GpuResult<()> { + assert!(buffers.0.data_is_set, "DeviceBuf should be filled with some data"); + assert!(buffers.2.data_is_set, "DeviceBuf should be filled with some data"); + let slot_idx = manager.get_slot_idx(PolyId::S, PolyForm::Values).unwrap(); let device_id = manager.ctx[ctx_id].device_id(); let stream = &mut manager.ctx[ctx_id].exec_stream; @@ -352,6 +358,7 @@ fn assign_columns( res_buff.write_event.record(stream)?; buffers.2.read_event.record(stream)?; buffers.0.read_event.record(stream)?; + res_buff.data_is_set = true; } offset += range.len(); diff --git a/crates/gpu-prover/src/memory_manager/proving_operations/compute_assigments_and_permutations.rs b/crates/gpu-prover/src/memory_manager/proving_operations/compute_assigments_and_permutations.rs index a9afa5a..958971c 100644 --- a/crates/gpu-prover/src/memory_manager/proving_operations/compute_assigments_and_permutations.rs +++ b/crates/gpu-prover/src/memory_manager/proving_operations/compute_assigments_and_permutations.rs @@ -89,6 +89,7 @@ fn create_buffers_for_computing_assigments_and_permutations( len: MC::FULL_SLOT_SIZE, device_id: device_id_0, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -107,6 +108,7 @@ fn create_buffers_for_computing_assigments_and_permutations( len: 4 * MC::FULL_SLOT_SIZE, device_id: device_id_1, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -121,6 +123,7 @@ fn create_buffers_for_computing_assigments_and_permutations( len: 4 * MC::FULL_SLOT_SIZE, device_id: device_id_1, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -134,6 +137,7 @@ fn create_buffers_for_computing_assigments_and_permutations( len: 4, device_id: device_id_1, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -153,6 +157,7 @@ fn create_buffers_for_computing_assigments_and_permutations( len: assignments_len + 1, device_id: device_id_0, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -191,9 +196,10 @@ fn set_initial_values( let num_non_residues = 4; let mut host_non_residues = AsyncVec::allocate_new(num_non_residues); - let mut host_buff = host_non_residues.get_values_mut()?; - host_buff[0] = Fr::one(); - host_buff[1..].copy_from_slice(&make_non_residues::(num_non_residues - 1)); + let mut non_residues_buff = vec![Fr::one(); num_non_residues]; + non_residues_buff[1..].copy_from_slice(&make_non_residues::(num_non_residues - 1)); + + host_non_residues.copy_from_slice(&non_residues_buff)?; non_residues.async_copy_from_host( &mut manager.ctx[ctx_id_1], diff --git a/crates/gpu-prover/src/memory_manager/proving_operations/compute_permutations.rs b/crates/gpu-prover/src/memory_manager/proving_operations/compute_permutations.rs index 12534d0..a2bf61a 100644 --- a/crates/gpu-prover/src/memory_manager/proving_operations/compute_permutations.rs +++ b/crates/gpu-prover/src/memory_manager/proving_operations/compute_permutations.rs @@ -61,6 +61,7 @@ fn create_buffers_for_computing_assigments( len: 4 * MC::FULL_SLOT_SIZE, device_id, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -74,6 +75,7 @@ fn create_buffers_for_computing_assigments( len: 4 * MC::FULL_SLOT_SIZE, device_id, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -87,6 +89,7 @@ fn create_buffers_for_computing_assigments( len: 4, device_id, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -99,9 +102,10 @@ fn create_buffers_for_computing_assigments( let num_non_residues = 4; let mut host_non_residues = AsyncVec::allocate_new(num_non_residues); - let mut host_buff = host_non_residues.get_values_mut()?; - host_buff[0] = Fr::one(); - host_buff[1..].copy_from_slice(&make_non_residues::(num_non_residues - 1)); + let mut non_residues_buff = vec![Fr::one(); num_non_residues]; + non_residues_buff[1..].copy_from_slice(&make_non_residues::(num_non_residues - 1)); + + host_non_residues.copy_from_slice(&non_residues_buff)?; non_residues.async_copy_from_host( &mut manager.ctx[ctx_id], @@ -254,6 +258,7 @@ pub fn compute_permutation_polynomials_on_device( } permutations.write_event.record(&stream)?; + permutations.data_is_set = true; Ok(()) } diff --git a/crates/gpu-prover/src/memory_manager/proving_operations/compute_state_values.rs b/crates/gpu-prover/src/memory_manager/proving_operations/compute_state_values.rs index 54649af..480db61 100644 --- a/crates/gpu-prover/src/memory_manager/proving_operations/compute_state_values.rs +++ b/crates/gpu-prover/src/memory_manager/proving_operations/compute_state_values.rs @@ -71,6 +71,7 @@ fn create_buffers_for_computing_assigments( len: MC::FULL_SLOT_SIZE, device_id, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -85,6 +86,7 @@ fn create_buffers_for_computing_assigments( len: 4 * MC::FULL_SLOT_SIZE, device_id, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -99,6 +101,7 @@ fn create_buffers_for_computing_assigments( len: assignments_len + 1, device_id, + data_is_set: false, is_static_mem: true, is_freed: true, @@ -255,6 +258,8 @@ pub fn assign_variables( stream.wait(state_polys.write_event())?; stream.wait(variables.write_event())?; stream.wait(assigments.write_event())?; + assert!(variables.data_is_set, "DeviceBuf should be filled with some data"); + assert!(assigments.data_is_set, "DeviceBuf should be filled with some data"); let length = variables.len(); let result = state_polys.as_mut_ptr(num_input_gates..length) as *mut c_void; @@ -269,6 +274,7 @@ pub fn assign_variables( }; } state_polys.write_event.record(stream)?; + state_polys.data_is_set = true; Ok(()) } diff --git a/crates/gpu-prover/src/memory_manager/proving_operations/create_selectors.rs b/crates/gpu-prover/src/memory_manager/proving_operations/create_selectors.rs index 8991f50..5bfe7e7 100644 --- a/crates/gpu-prover/src/memory_manager/proving_operations/create_selectors.rs +++ b/crates/gpu-prover/src/memory_manager/proving_operations/create_selectors.rs @@ -329,6 +329,7 @@ pub fn create_selectors_inner( result_range: Range, ) -> GpuResult<()> { assert!(result_range.len() <= bitvec.len() * 256); + assert!(bitvec.data_is_set, "DeviceBuf should be filled with some data"); ctx.exec_stream.wait(result.read_event())?; ctx.exec_stream.wait(result.write_event())?; @@ -352,6 +353,7 @@ pub fn create_selectors_inner( result.write_event.record(ctx.exec_stream())?; bitvec.read_event.record(ctx.exec_stream())?; + result.data_is_set = true; Ok(()) } diff --git a/crates/gpu-prover/src/setup_precomputations.rs b/crates/gpu-prover/src/setup_precomputations.rs index 75bf763..538b3f0 100644 --- a/crates/gpu-prover/src/setup_precomputations.rs +++ b/crates/gpu-prover/src/setup_precomputations.rs @@ -56,10 +56,10 @@ cfg_if! { macro_rules! impl_async_setup { (impl AsyncSetup $inherent:tt) => { - #[cfg(feature = "allocator")] - impl AsyncSetup $inherent + // #[cfg(feature = "allocator")] + // impl AsyncSetup $inherent - #[cfg(not(feature = "allocator"))] + // #[cfg(not(feature = "allocator"))] impl AsyncSetup $inherent }; } @@ -94,7 +94,7 @@ impl_async_setup! { pub fn zeroize(&mut self){ for poly in self.gate_setup_monomials.iter_mut() { - poly.zeroize(); + poly.fill(Fr::zero()); } for poly in self.gate_selectors_bitvecs.iter_mut() { @@ -103,13 +103,13 @@ impl_async_setup! { } for poly in self.lookup_tables_values.iter_mut() { - poly.zeroize() + poly.fill(Fr::zero()); } self.lookup_selector_bitvec.set_all(); self.lookup_selector_bitvec.negate(); - self.lookup_table_type_monomial.zeroize(); + self.lookup_table_type_monomial.fill(Fr::zero()); } pub fn write( @@ -267,7 +267,9 @@ impl_async_setup! { manager.copy_to_device_with_host_slot(worker, values.as_ref(), PolyId::Enumerated(i), PolyForm::Values); manager.multigpu_ifft(PolyId::Enumerated(i), false); - manager.copy_from_device_with_host_slot(worker, self.gate_setup_monomials[i].get_values_mut()?, PolyId::Enumerated(i), PolyForm::Monomial); + + manager.copy_from_device_to_host_pinned(PolyId::Enumerated(i), PolyForm::Monomial)?; + self.gate_setup_monomials[i].async_copy_from_slice(worker, manager.get_host_slot_values(PolyId::Enumerated(i), PolyForm::Monomial)?)?; manager.free_host_slot(PolyId::Enumerated(i), PolyForm::Values); manager.free_host_slot(PolyId::Enumerated(i), PolyForm::Monomial); @@ -311,8 +313,9 @@ impl_async_setup! { let copy_end = copy_start + tails_len; for (i, tail) in table_tails.into_iter().enumerate() { + self.lookup_tables_values[i].fill(Fr::zero()); let values = self.lookup_tables_values[i].get_values_mut()?; - fill_with_zeros(worker, &mut values[..]); + // fill_with_zeros(worker, &mut values[..]); async_copy(worker, &mut values[copy_start..copy_end], &tail[..]); } @@ -339,7 +342,9 @@ impl_async_setup! { let poly = Polynomial::from_values(table_type_values).unwrap(); manager.copy_to_device_with_host_slot(worker, poly.as_ref(), PolyId::Enumerated(0), PolyForm::Values); manager.multigpu_ifft(PolyId::Enumerated(0), false); - manager.copy_from_device_with_host_slot(worker, self.lookup_table_type_monomial.get_values_mut()?, PolyId::Enumerated(0), PolyForm::Monomial); + + manager.copy_from_device_to_host_pinned(PolyId::Enumerated(0), PolyForm::Monomial)?; + self.lookup_table_type_monomial.async_copy_from_slice(worker, manager.get_host_slot_values(PolyId::Enumerated(0), PolyForm::Monomial)?)?; manager.free_host_slot(PolyId::Enumerated(0), PolyForm::Values); manager.free_host_slot(PolyId::Enumerated(0), PolyForm::Monomial);