diff --git a/python/src/reader.rs b/python/src/reader.rs index 73580d4..31c49b4 100644 --- a/python/src/reader.rs +++ b/python/src/reader.rs @@ -115,11 +115,15 @@ struct ObspecReader { } impl AsyncFileReader for ObspecReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { self.backend.get_range_wrapper(&self.path, range).boxed() } - fn get_byte_ranges( + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.backend.get_range_wrapper(&self.path, range).boxed() + } + + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { diff --git a/src/ifd.rs b/src/ifd.rs index d522826..90f399f 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -779,7 +779,7 @@ impl ImageFileDirectory { let range = self .get_tile_byte_range(x, y) .ok_or(AsyncTiffError::General("Not a tiled TIFF".to_string()))?; - let compressed_bytes = reader.get_bytes(range).await?; + let compressed_bytes = reader.get_image_bytes(range).await?; Ok(Tile { x, y, @@ -809,8 +809,8 @@ impl ImageFileDirectory { }) .collect::>>()?; - // 2: Fetch using `get_ranges - let buffers = reader.get_byte_ranges(byte_ranges).await?; + // 2: Fetch using `get_image_byte_ranges` + let buffers = reader.get_image_byte_ranges(byte_ranges).await?; // 3: Create tile objects let mut tiles = vec![]; diff --git a/src/reader.rs b/src/reader.rs index 3a58ddb..6c3dcb5 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -20,22 +20,30 @@ use crate::error::{AsyncTiffError, AsyncTiffResult}; /// /// Notes: /// -/// 1. There is a default implementation for types that implement [`tokio::io::AsyncRead`] -/// and [`tokio::io::AsyncSeek`], for example [`tokio::fs::File`]. +/// 1. There are distinct traits for accessing "metadata bytes" and "image bytes". The requests for +/// "metadata bytes" from `get_metadata_bytes` will be called from `TIFF.open`, while parsing +/// IFDs. Requests for "image bytes" from `get_image_bytes` and `get_image_byte_ranges` will be +/// called while fetching data from TIFF tiles or strips. /// /// 2. [`ObjectReader`], available when the `object_store` crate feature /// is enabled, implements this interface for [`ObjectStore`]. /// +/// 3. You can use [`TokioReader`] to implement [`AsyncFileReader`] for types that implement +/// [`tokio::io::AsyncRead`] and [`tokio::io::AsyncSeek`], for example [`tokio::fs::File`]. +/// /// [`ObjectStore`]: object_store::ObjectStore /// /// [`tokio::fs::File`]: https://docs.rs/tokio/latest/tokio/fs/struct.File.html pub trait AsyncFileReader: Debug + Send + Sync { - /// Retrieve the bytes in `range` - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; + /// Retrieve the bytes in `range` as part of a request for header metadata. + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; + + /// Retrieve the bytes in `range` as part of a request for image data, not header metadata. + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; - /// Retrieve multiple byte ranges. The default implementation will call `get_bytes` - /// sequentially - fn get_byte_ranges( + /// Retrieve multiple byte ranges as part of a request for image data, not header metadata. The + /// default implementation will call `get_image_bytes` sequentially + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { @@ -43,7 +51,7 @@ pub trait AsyncFileReader: Debug + Send + Sync { let mut result = Vec::with_capacity(ranges.len()); for range in ranges.into_iter() { - let data = self.get_bytes(range).await?; + let data = self.get_image_bytes(range).await?; result.push(data); } @@ -55,15 +63,19 @@ pub trait AsyncFileReader: Debug + Send + Sync { /// This allows Box to be used as an AsyncFileReader, impl AsyncFileReader for Box { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.as_ref().get_bytes(range) + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.as_ref().get_metadata_bytes(range) + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.as_ref().get_image_bytes(range) } - fn get_byte_ranges( + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { - self.as_ref().get_byte_ranges(ranges) + self.as_ref().get_image_byte_ranges(ranges) } } @@ -89,31 +101,36 @@ impl Toki pub fn new(inner: T) -> Self { Self(tokio::sync::Mutex::new(inner)) } -} -#[cfg(feature = "tokio")] -impl AsyncFileReader - for TokioReader -{ - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + async fn make_range_request(&self, range: Range) -> AsyncTiffResult { use std::io::SeekFrom; use tokio::io::{AsyncReadExt, AsyncSeekExt}; - async move { - let mut file = self.0.lock().await; - - file.seek(SeekFrom::Start(range.start)).await?; + let mut file = self.0.lock().await; - let to_read = range.end - range.start; - let mut buffer = Vec::with_capacity(to_read as usize); - let read = file.read(&mut buffer).await? as u64; - if read != to_read { - return Err(AsyncTiffError::EndOfFile(to_read, read)); - } + file.seek(SeekFrom::Start(range.start)).await?; - Ok(buffer.into()) + let to_read = range.end - range.start; + let mut buffer = Vec::with_capacity(to_read as usize); + let read = file.read(&mut buffer).await? as u64; + if read != to_read { + return Err(AsyncTiffError::EndOfFile(to_read, read)); } - .boxed() + + Ok(buffer.into()) + } +} + +#[cfg(feature = "tokio")] +impl AsyncFileReader + for TokioReader +{ + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() } } @@ -133,19 +150,30 @@ impl ObjectReader { pub fn new(store: Arc, path: object_store::path::Path) -> Self { Self { store, path } } -} -#[cfg(feature = "object_store")] -impl AsyncFileReader for ObjectReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + async fn make_range_request(&self, range: Range) -> AsyncTiffResult { let range = range.start as _..range.end as _; self.store .get_range(&self.path, range) .map_err(|e| e.into()) - .boxed() + .await + } +} + +#[cfg(feature = "object_store")] +impl AsyncFileReader for ObjectReader { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() } - fn get_byte_ranges(&self, ranges: Vec>) -> BoxFuture<'_, AsyncTiffResult>> + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() + } + + fn get_image_byte_ranges( + &self, + ranges: Vec>, + ) -> BoxFuture<'_, AsyncTiffResult>> where Self: Send, { @@ -177,11 +205,8 @@ impl ReqwestReader { pub fn new(client: reqwest::Client, url: reqwest::Url) -> Self { Self { client, url } } -} -#[cfg(feature = "reqwest")] -impl AsyncFileReader for ReqwestReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn make_range_request(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { let url = self.url.clone(); let client = self.client.clone(); // HTTP range is inclusive, so we need to subtract 1 from the end @@ -200,6 +225,17 @@ impl AsyncFileReader for ReqwestReader { } } +#[cfg(feature = "reqwest")] +impl AsyncFileReader for ReqwestReader { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range) + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range) + } +} + /// An AsyncFileReader that caches the first `prefetch` bytes of a file. #[derive(Debug)] pub struct PrefetchReader { @@ -210,13 +246,13 @@ pub struct PrefetchReader { impl PrefetchReader { /// Construct a new PrefetchReader, catching the first `prefetch` bytes of the file. pub async fn new(reader: Arc, prefetch: u64) -> AsyncTiffResult { - let buffer = reader.get_bytes(0..prefetch).await?; + let buffer = reader.get_metadata_bytes(0..prefetch).await?; Ok(Self { reader, buffer }) } } impl AsyncFileReader for PrefetchReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { if range.start < self.buffer.len() as _ { if range.end < self.buffer.len() as _ { let usize_range = range.start as usize..range.end as usize; @@ -224,20 +260,29 @@ impl AsyncFileReader for PrefetchReader { async { Ok(result) }.boxed() } else { // TODO: reuse partial internal buffer - self.reader.get_bytes(range) + self.reader.get_metadata_bytes(range) } } else { - self.reader.get_bytes(range) + self.reader.get_metadata_bytes(range) } } - fn get_byte_ranges(&self, ranges: Vec>) -> BoxFuture<'_, AsyncTiffResult>> + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + // In practice, get_image_bytes is only used for fetching tiles, which are unlikely + // to overlap a metadata prefetch. + self.reader.get_image_bytes(range) + } + + fn get_image_byte_ranges( + &self, + ranges: Vec>, + ) -> BoxFuture<'_, AsyncTiffResult>> where Self: Send, { - // In practice, get_byte_ranges is only used for fetching tiles, which are unlikely to - // overlap a metadata prefetch. - self.reader.get_byte_ranges(ranges) + // In practice, get_image_byte_ranges is only used for fetching tiles, which are unlikely + // to overlap a metadata prefetch. + self.reader.get_image_byte_ranges(ranges) } } @@ -298,7 +343,7 @@ impl AsyncCursor { pub(crate) async fn read(&mut self, length: u64) -> AsyncTiffResult { let range = self.offset as _..(self.offset + length) as _; self.offset += length; - let bytes = self.reader.get_bytes(range).await?; + let bytes = self.reader.get_metadata_bytes(range).await?; Ok(EndianAwareReader { reader: bytes.reader(), endianness: self.endianness,