-
Notifications
You must be signed in to change notification settings - Fork 4
feat: Exponential read-ahead cache #140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
kylebarron
wants to merge
10
commits into
main
Choose a base branch
from
kyle/exponential-cache
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+348
−115
Open
Changes from 3 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
b6235b9
feat: Exponential cache
kylebarron 12b0ebf
cleanup
kylebarron f109f22
add tokio sync dependency
kylebarron 43524f9
Replace prefetch buffer
kylebarron e69626e
make infallible
kylebarron 00a6e72
Add unit test for readahead cache
kylebarron 73adace
Update python bindings
kylebarron 6d7cb00
minimal Python error handling
kylebarron e897ba7
Update docs
kylebarron b8c3ffa
rename to cache
kylebarron File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| //! Caching strategies for metadata fetching. | ||
|
|
||
| use std::ops::Range; | ||
| use std::sync::Arc; | ||
|
|
||
| use bytes::{Bytes, BytesMut}; | ||
| use futures::future::BoxFuture; | ||
| use tokio::sync::Mutex; | ||
|
|
||
| use crate::error::AsyncTiffResult; | ||
| use crate::metadata::MetadataFetch; | ||
|
|
||
| /// Logic for managing a cache of sequential buffers | ||
| struct SequentialCache { | ||
| /// Contiguous blocks from offset 0 | ||
| /// | ||
| /// # Invariant | ||
| /// - Buffers are contiguous from offset 0 | ||
| buffers: Vec<Bytes>, | ||
|
|
||
| /// Total length cached (== sum of buffers lengths) | ||
| len: u64, | ||
| } | ||
|
|
||
| impl SequentialCache { | ||
| /// Create a new, empty SequentialCache | ||
| fn new() -> Self { | ||
| Self { | ||
| buffers: vec![], | ||
| len: 0, | ||
| } | ||
| } | ||
|
|
||
| /// Check if the given range is fully contained within the cached buffers | ||
| fn contains(&self, range: Range<u64>) -> bool { | ||
| range.end <= self.len | ||
| } | ||
|
|
||
| /// Slice out the given range from the cached buffers | ||
| fn slice(&self, range: Range<u64>) -> Bytes { | ||
| let out_len = (range.end - range.start) as usize; | ||
| // guaranteed valid | ||
| let mut remaining = range; | ||
| let mut out_buffers: Vec<Bytes> = vec![]; | ||
|
|
||
| for b in &self.buffers { | ||
| let b_len = b.len() as u64; | ||
|
|
||
| // this block falls entirely before the desired range start | ||
| if remaining.start >= b_len { | ||
| remaining.start -= b_len; | ||
| remaining.end -= b_len; | ||
| continue; | ||
| } | ||
|
|
||
| // we slice bytes out of *this* block | ||
| let start = remaining.start as usize; | ||
| let end = (remaining.end - remaining.start).min(b_len - remaining.start) as usize; | ||
|
|
||
| let chunk = b.slice(start..end); | ||
| out_buffers.push(chunk); | ||
|
|
||
| // consumed some portion; update and potentially break | ||
| remaining.start = 0; | ||
| if remaining.end <= b_len { | ||
| break; | ||
| } | ||
| remaining.end -= b_len; | ||
| } | ||
|
|
||
| if out_buffers.len() == 1 { | ||
| out_buffers.into_iter().next().unwrap() | ||
| } else { | ||
| let mut out = BytesMut::with_capacity(out_len); | ||
| for b in out_buffers { | ||
| out.extend_from_slice(&b); | ||
| } | ||
| out.into() | ||
| } | ||
| } | ||
|
|
||
| fn append_buffer(&mut self, buffer: Bytes) { | ||
| self.len += buffer.len() as u64; | ||
| self.buffers.push(buffer); | ||
| } | ||
| } | ||
|
|
||
| /// A MetadataFetch implementation that caches fetched data in exponentially growing chunks, | ||
| /// sequentially from the beginning of the file. | ||
| pub struct ExponentialMetadataCache<F: MetadataFetch> { | ||
| inner: F, | ||
| cache: Arc<Mutex<SequentialCache>>, | ||
| } | ||
|
|
||
| impl<F: MetadataFetch> ExponentialMetadataCache<F> { | ||
| /// Create a new ExponentialMetadataCache wrapping the given MetadataFetch | ||
| pub fn new(inner: F) -> AsyncTiffResult<Self> { | ||
| Ok(Self { | ||
| inner, | ||
| cache: Arc::new(Mutex::new(SequentialCache::new())), | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| fn next_fetch_size(existing_len: u64) -> u64 { | ||
| if existing_len == 0 { | ||
| 64 * 1024 | ||
| } else { | ||
| existing_len * 2 | ||
| } | ||
| } | ||
|
|
||
| impl<F: MetadataFetch + Send + Sync> MetadataFetch for ExponentialMetadataCache<F> { | ||
| fn fetch(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> { | ||
| let cache = self.cache.clone(); | ||
|
|
||
| Box::pin(async move { | ||
| let mut g = cache.lock().await; | ||
|
|
||
| // First check if we already have the range cached | ||
| if g.contains(range.start..range.end) { | ||
| return Ok(g.slice(range)); | ||
| } | ||
|
|
||
| // Compute the correct fetch range | ||
| let start_len = g.len; | ||
| let needed = range.end.saturating_sub(start_len); | ||
| let fetch_size = next_fetch_size(start_len).max(needed); | ||
| let fetch_range = start_len..start_len + fetch_size; | ||
|
|
||
| // Perform the fetch while holding mutex | ||
| // (this is OK because the mutex is async) | ||
| let bytes = self.inner.fetch(fetch_range).await?; | ||
|
|
||
| // Now append safely | ||
| g.append_buffer(bytes); | ||
|
|
||
| Ok(g.slice(range)) | ||
| }) | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The slice here might return an empty chunk, and the .push would fail with "IOError(Error { kind: UnexpectedEof, message: "failed to fill whole buffer" })". Maybe re-check the logic here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you think of an example of a unit test that would catch this?