Skip to content

Commit b6235b9

Browse files
committed
feat: Exponential cache
1 parent d41d111 commit b6235b9

File tree

3 files changed

+151
-0
lines changed

3 files changed

+151
-0
lines changed

python/Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/metadata/cache.rs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
//! Caching strategies for metadata fetching.
2+
3+
use std::ops::Range;
4+
use std::sync::Arc;
5+
6+
use bytes::{Bytes, BytesMut};
7+
use futures::future::BoxFuture;
8+
use tokio::sync::Mutex;
9+
10+
use crate::error::AsyncTiffResult;
11+
use crate::metadata::MetadataFetch;
12+
13+
/// Logic for managing a cache of sequential buffers
14+
struct SequentialCache {
15+
/// Contiguous blocks from offset 0
16+
///
17+
/// # Invariant
18+
/// - Buffers are contiguous from offset 0
19+
buffers: Vec<Bytes>,
20+
21+
/// Total length cached (== sum of buffers lengths)
22+
len: u64,
23+
}
24+
25+
impl SequentialCache {
26+
/// Create a new, empty SequentialCache
27+
fn new() -> Self {
28+
Self {
29+
buffers: vec![],
30+
len: 0,
31+
}
32+
}
33+
34+
/// Check if the given range is fully contained within the cached buffers
35+
fn contains(&self, range: Range<u64>) -> bool {
36+
range.end <= self.len
37+
}
38+
39+
/// Slice out the given range from the cached buffers
40+
fn slice(&self, range: Range<u64>) -> Bytes {
41+
let out_len = (range.end - range.start) as usize;
42+
// guaranteed valid
43+
let mut remaining = range;
44+
let mut out_buffers: Vec<Bytes> = vec![];
45+
46+
for b in &self.buffers {
47+
let b_len = b.len() as u64;
48+
49+
// this block falls entirely before the desired range start
50+
if remaining.start >= b_len {
51+
remaining.start -= b_len;
52+
remaining.end -= b_len;
53+
continue;
54+
}
55+
56+
// we slice bytes out of *this* block
57+
let start = remaining.start as usize;
58+
let end = (remaining.end - remaining.start).min(b_len - remaining.start) as usize;
59+
60+
let chunk = b.slice(start..end);
61+
out_buffers.push(chunk);
62+
63+
// consumed some portion; update and potentially break
64+
remaining.start = 0;
65+
if remaining.end <= b_len {
66+
break;
67+
}
68+
remaining.end -= b_len;
69+
}
70+
71+
if out_buffers.len() == 1 {
72+
out_buffers.into_iter().next().unwrap()
73+
} else {
74+
let mut out = BytesMut::with_capacity(out_len);
75+
for b in out_buffers {
76+
out.extend_from_slice(&b);
77+
}
78+
out.into()
79+
}
80+
}
81+
82+
fn append_buffer(&mut self, buffer: Bytes) {
83+
self.len += buffer.len() as u64;
84+
self.buffers.push(buffer);
85+
}
86+
}
87+
88+
/// A MetadataFetch implementation that caches fetched data in exponentially growing chunks,
89+
/// sequentially from the beginning of the file.
90+
pub struct ExponentialMetadataCache<F: MetadataFetch> {
91+
fetch: Arc<F>,
92+
cache: Arc<Mutex<SequentialCache>>,
93+
}
94+
95+
impl<F: MetadataFetch> ExponentialMetadataCache<F> {
96+
/// Create a new ExponentialMetadataCache wrapping the given MetadataFetch
97+
pub fn new(fetch: F) -> AsyncTiffResult<Self> {
98+
Ok(Self {
99+
fetch: Arc::new(fetch),
100+
cache: Arc::new(Mutex::new(SequentialCache::new())),
101+
})
102+
}
103+
}
104+
105+
fn next_fetch_size(existing_len: u64) -> u64 {
106+
let min = 64 * 1024;
107+
if existing_len == 0 {
108+
return min;
109+
}
110+
existing_len * 2
111+
}
112+
113+
impl<F: MetadataFetch + Send + Sync> MetadataFetch for ExponentialMetadataCache<F> {
114+
fn fetch(&self, range: Range<u64>) -> BoxFuture<'_, AsyncTiffResult<Bytes>> {
115+
let inner = self.fetch.clone();
116+
let cache = self.cache.clone();
117+
118+
Box::pin(async move {
119+
let mut g = cache.lock().await;
120+
121+
// First check if we already have the range cached
122+
if g.contains(range.start..range.end) {
123+
return Ok(g.slice(range));
124+
}
125+
126+
// Compute the correct fetch range
127+
let start_len = g.len;
128+
let needed = range.end.saturating_sub(start_len);
129+
let fetch_size = next_fetch_size(start_len).max(needed);
130+
let fetch_range = start_len..start_len + fetch_size;
131+
132+
// Perform the fetch while holding mutex
133+
// (this is OK because the mutex is async)
134+
let bytes = inner.fetch(fetch_range).await?;
135+
136+
// Now append safely
137+
g.append_buffer(bytes);
138+
139+
Ok(g.slice(range))
140+
})
141+
}
142+
}

src/metadata/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,10 @@
5858
//! fetches the first `N` bytes out of a file.
5959
//!
6060
61+
pub mod cache;
6162
mod fetch;
6263
mod reader;
6364

65+
pub use cache::ExponentialMetadataCache;
6466
pub use fetch::{MetadataFetch, PrefetchBuffer};
6567
pub use reader::{ImageFileDirectoryReader, TiffMetadataReader};

0 commit comments

Comments
 (0)