Skip to content

Commit e8d959b

Browse files
committed
resolve conflicts
1 parent ab43d61 commit e8d959b

File tree

10 files changed

+436
-207
lines changed

10 files changed

+436
-207
lines changed
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
use std::{
2+
ptr::NonNull,
3+
sync::{Arc, Mutex},
4+
};
5+
6+
use super::*;
7+
8+
#[derive(Clone)]
9+
pub(crate) struct UnsafeNonNullPtr(pub(crate) Arc<NonNull<[u8]>>);
10+
unsafe impl Send for UnsafeNonNullPtr {}
11+
unsafe impl Sync for UnsafeNonNullPtr {}
12+
13+
impl UnsafeNonNullPtr {
14+
pub(crate) fn new(ptr: NonNull<[u8]>) -> Self {
15+
Self(Arc::new(ptr))
16+
}
17+
18+
pub(crate) fn as_ptr(&self) -> *const u8 {
19+
self.0.as_ptr().cast()
20+
}
21+
pub(crate) fn as_mut_ptr(&mut self) -> *mut u8 {
22+
self.0.as_ptr().cast()
23+
}
24+
}
25+
26+
#[derive(Clone)]
27+
pub(crate) struct StaticBitmapAllocator {
28+
pub(crate) memory: UnsafeNonNullPtr,
29+
pub(crate) memory_size: usize,
30+
pub(crate) block_size_in_bytes: usize,
31+
pub(crate) bitmap: Arc<Mutex<Vec<bool>>>,
32+
}
33+
34+
impl StaticBitmapAllocator {
35+
pub(crate) fn init(
36+
memory: NonNull<[u8]>,
37+
num_blocks: usize,
38+
block_size_in_bytes: usize,
39+
) -> Self {
40+
let memory_size_in_bytes = num_blocks * block_size_in_bytes;
41+
Self {
42+
memory: UnsafeNonNullPtr::new(memory),
43+
memory_size: memory_size_in_bytes,
44+
block_size_in_bytes,
45+
bitmap: Arc::new(Mutex::new(vec![false; num_blocks])),
46+
}
47+
}
48+
49+
pub(crate) fn as_ptr(&self) -> *const u8 {
50+
self.memory.as_ptr().cast()
51+
}
52+
53+
pub(crate) fn find_free_block(&self) -> Option<usize> {
54+
for (idx, entry) in self.bitmap.lock().unwrap().iter_mut().enumerate() {
55+
if !*entry {
56+
*entry = true;
57+
return Some(idx);
58+
}
59+
}
60+
None
61+
}
62+
63+
#[allow(unreachable_code)]
64+
pub(crate) fn find_adjacent_free_blocks(
65+
&self,
66+
requested_num_blocks: usize,
67+
) -> Option<std::ops::Range<usize>> {
68+
let mut bitmap = self.bitmap.lock().unwrap();
69+
if requested_num_blocks > bitmap.len() {
70+
return None;
71+
}
72+
let _range_of_blocks_found = false;
73+
let _found_range = 0..0;
74+
75+
let mut start = 0;
76+
let mut end = requested_num_blocks;
77+
let mut busy_block_idx = 0;
78+
loop {
79+
let mut has_busy_block = false;
80+
for (idx, sub_entry) in bitmap[start..end].iter().copied().enumerate() {
81+
if sub_entry {
82+
has_busy_block = true;
83+
busy_block_idx = start + idx;
84+
}
85+
}
86+
if !has_busy_block {
87+
for entry in bitmap[start..end].iter_mut() {
88+
*entry = true;
89+
}
90+
return Some(start..end);
91+
} else {
92+
start = busy_block_idx + 1;
93+
end = start + requested_num_blocks;
94+
if end > bitmap.len() {
95+
break;
96+
}
97+
}
98+
}
99+
// panic!("not found block {} {} {}", start, end, self.bitmap.len());
100+
None
101+
}
102+
103+
pub(crate) fn free_blocks(&self, index: usize, num_blocks: usize) {
104+
assert!(num_blocks > 0);
105+
let mut guard = self.bitmap.lock().unwrap();
106+
for i in index..index + num_blocks {
107+
guard[i] = false;
108+
}
109+
}
110+
111+
pub(crate) fn allocate(
112+
&self,
113+
layout: std::alloc::Layout,
114+
) -> CudaResult<std::ptr::NonNull<[u8]>> {
115+
let size = layout.size();
116+
assert!(size > 0);
117+
assert_eq!(size % self.block_size_in_bytes, 0);
118+
let num_blocks = size / self.block_size_in_bytes;
119+
120+
if size > self.block_size_in_bytes {
121+
if let Some(range) = self.find_adjacent_free_blocks(num_blocks) {
122+
let index = range.start;
123+
let offset = index * self.block_size_in_bytes;
124+
let ptr = unsafe { self.as_ptr().add(offset) };
125+
let ptr = unsafe { NonNull::new_unchecked(ptr as _) };
126+
return Ok(NonNull::slice_from_raw_parts(ptr, size));
127+
}
128+
panic!("allocation of {} blocks has failed", num_blocks);
129+
// return Err(CudaError::AllocationError(format!(
130+
// "allocation of {} blocks has failed",
131+
// num_blocks
132+
// )));
133+
}
134+
135+
if let Some(index) = self.find_free_block() {
136+
let offset = index * self.block_size_in_bytes;
137+
let ptr = unsafe { self.as_ptr().add(offset) };
138+
let ptr = unsafe { NonNull::new_unchecked(ptr as _) };
139+
Ok(NonNull::slice_from_raw_parts(ptr, size))
140+
} else {
141+
panic!("allocation of 1 block has failed");
142+
// return Err(CudaError::AllocationError(format!(
143+
// "allocation of 1 block has failed",
144+
// )));
145+
}
146+
}
147+
148+
pub(crate) fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout) {
149+
let size = layout.size();
150+
assert!(size > 0);
151+
assert_eq!(size % self.block_size_in_bytes, 0);
152+
let offset = unsafe { ptr.as_ptr().offset_from(self.as_ptr()) } as usize;
153+
if offset >= self.memory_size {
154+
return;
155+
}
156+
assert_eq!(offset % self.block_size_in_bytes, 0);
157+
let index = offset / self.block_size_in_bytes;
158+
let num_blocks = size / self.block_size_in_bytes;
159+
self.free_blocks(index, num_blocks);
160+
}
161+
}

crates/fflonk/src/allocator/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
use super::*;
2+
3+
mod bitmap;
4+
use bitmap::*;
5+
26
mod pinned;
37
pub use pinned::*;
48

@@ -7,3 +11,7 @@ pub use pool::*;
711

812
mod static_device;
913
pub use static_device::*;
14+
15+
16+
use std::ptr::NonNull;
17+
use bellman::bn256::Fr;

crates/fflonk/src/allocator/pinned.rs

Lines changed: 86 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,110 @@ use super::*;
33
// Both assembly and device setup has an ability to store data on the pinned memory
44
// - Assembly uses for the variables(7487741), state and setup columns
55
// - Device setup uses variable indexes and gate selectors
6-
static mut _STATIC_HOST_ALLOC: Option<GlobalHost> = None;
6+
static mut _STATIC_HOST_ALLOC: Option<GlobalStaticHost> = None;
77

8-
#[derive(Clone, Debug, Default, Eq, PartialEq)]
9-
pub struct GlobalHost;
8+
pub(crate) fn _static_host_alloc() -> GlobalStaticHost {
9+
unsafe {
10+
_STATIC_HOST_ALLOC
11+
.as_ref()
12+
.expect("initialize static host allocator")
13+
.clone()
14+
}
15+
}
1016

11-
impl GlobalHost {
12-
pub fn init(domain_size: usize) -> CudaResult<Self> {
13-
let num_variables = 0;
14-
let num_cols = 3;
17+
pub(crate) fn init_static_host_alloc(domain_size: usize) {
18+
unsafe {
19+
// Pinned memory could be initialized before device initialization
20+
if _STATIC_HOST_ALLOC.is_some() {
21+
println!("fflonk pinned memory already initialized, ignoring");
22+
return;
23+
}
24+
}
25+
// Bitmap allocator with small block size and high number of allocations doesn't make
26+
// sense, and doesn't give good runtime performance compared to default allocator.
27+
// However it provides satisfying improvement for 3 combined monomials, since prover
28+
// transfers them them back and forth in case of L4 devices.
29+
let num_blocks = 3;
30+
let block_size_in_bytes = 9 * 32 * domain_size;
31+
let allocator = GlobalStaticHost::init(num_blocks, block_size_in_bytes)
32+
.expect("initialize static allocator");
1533

16-
let size_of_indexes_in_bytes = 8 * num_cols * domain_size;
17-
let size_of_vars_in_bytes = 32 * num_variables;
34+
unsafe { _STATIC_HOST_ALLOC = Some(allocator) }
35+
}
1836

19-
let total_size_in_bytes = size_of_indexes_in_bytes + size_of_vars_in_bytes;
37+
pub(crate) fn free_static_host_alloc() {
38+
unsafe {
39+
if let Some(alloc) = _STATIC_HOST_ALLOC.take() {
40+
alloc.free().expect("Couldn't free static allocator");
41+
}
42+
}
43+
}
2044

21-
todo!()
45+
#[derive(Clone)]
46+
pub struct GlobalStaticHost(StaticBitmapAllocator);
47+
48+
impl Default for GlobalStaticHost {
49+
fn default() -> Self {
50+
_static_host_alloc()
2251
}
2352
}
2453

2554
pub trait HostAllocator: Allocator + Default + Clone + Send + Sync + 'static {}
2655

27-
unsafe impl Allocator for GlobalHost {
56+
impl GlobalStaticHost {
57+
pub fn init(num_blocks: usize, block_size_in_bytes: usize) -> CudaResult<Self> {
58+
assert_ne!(num_blocks, 0);
59+
60+
let memory_size_in_bytes = num_blocks * block_size_in_bytes;
61+
let memory = host_allocate(memory_size_in_bytes)
62+
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
63+
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, memory_size_in_bytes))?;
64+
println!("allocated {memory_size_in_bytes} bytes on pinned host memory");
65+
let allocator = StaticBitmapAllocator::init(memory, num_blocks, block_size_in_bytes);
66+
67+
Ok(Self(allocator))
68+
}
69+
70+
pub(crate) fn free(self) -> CudaResult<()> {
71+
println!("freeing static cuda allocation");
72+
assert_eq!(std::sync::Arc::weak_count(&self.0.memory.0), 0);
73+
// TODO
74+
// assert_eq!(Arc::strong_count(&self.memory), 1);
75+
let StaticBitmapAllocator { mut memory, .. } = self.0;
76+
// let memory = Arc::try_unwrap(memory).expect("exclusive access");
77+
host_dealloc(memory.as_mut_ptr().cast())
78+
}
79+
}
80+
81+
unsafe impl Allocator for GlobalStaticHost {
2882
fn allocate(
2983
&self,
3084
layout: std::alloc::Layout,
85+
) -> Result<NonNull<[u8]>, std::alloc::AllocError> {
86+
self.0.allocate(layout).map_err(|_| std::alloc::AllocError)
87+
}
88+
89+
fn allocate_zeroed(
90+
&self,
91+
layout: std::alloc::Layout,
3192
) -> Result<std::ptr::NonNull<[u8]>, std::alloc::AllocError> {
32-
host_allocate(layout.size())
33-
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
34-
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, layout.size()))
35-
.map_err(|_| std::alloc::AllocError)
93+
let ptr = self.allocate(layout)?;
94+
let num_bytes = layout.size();
95+
unsafe {
96+
std::ptr::write_bytes(ptr.as_ptr() as *mut u8, 0, layout.size());
97+
let result = gpu_ffi::bc_memset(ptr.as_ptr().cast(), 0, num_bytes as u64);
98+
if result != 0 {
99+
panic!("Couldn't allocate zeroed buffer")
100+
}
101+
}
102+
103+
Ok(ptr)
36104
}
37105

38106
unsafe fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout) {
39-
host_dealloc(ptr.as_ptr().cast()).expect("deallocate static buffer")
107+
self.0.deallocate(ptr, layout);
40108
}
41109
}
42110

43-
impl HostAllocator for GlobalHost {}
111+
impl HostAllocator for GlobalStaticHost {}
44112
impl HostAllocator for std::alloc::Global {}

0 commit comments

Comments
 (0)