From 3e4b0ba0c7a1911f310b0773547e1f325582a2ab Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 24 Mar 2025 18:37:20 -0400 Subject: [PATCH 1/3] Split traits to get image bytes and metadata bytes --- src/ifd.rs | 4 +- src/reader.rs | 134 ++++++++++++++++++++++++++++++++------------------ 2 files changed, 89 insertions(+), 49 deletions(-) diff --git a/src/ifd.rs b/src/ifd.rs index d522826..913eb39 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -779,7 +779,7 @@ impl ImageFileDirectory { let range = self .get_tile_byte_range(x, y) .ok_or(AsyncTiffError::General("Not a tiled TIFF".to_string()))?; - let compressed_bytes = reader.get_bytes(range).await?; + let compressed_bytes = reader.get_image_bytes(range).await?; Ok(Tile { x, y, @@ -810,7 +810,7 @@ impl ImageFileDirectory { .collect::>>()?; // 2: Fetch using `get_ranges - let buffers = reader.get_byte_ranges(byte_ranges).await?; + let buffers = reader.get_image_byte_ranges(byte_ranges).await?; // 3: Create tile objects let mut tiles = vec![]; diff --git a/src/reader.rs b/src/reader.rs index 1757dad..ef96684 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -30,12 +30,15 @@ use crate::error::{AsyncTiffError, AsyncTiffResult}; /// /// [`tokio::fs::File`]: https://docs.rs/tokio/latest/tokio/fs/struct.File.html pub trait AsyncFileReader: Debug + Send + Sync { - /// Retrieve the bytes in `range` - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; + /// Retrieve the bytes in `range` as part of a request for header metadata. + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; - /// Retrieve multiple byte ranges. The default implementation will call `get_bytes` - /// sequentially - fn get_byte_ranges( + /// Retrieve the bytes in `range` as part of a request for image data, not header metadata. + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; + + /// Retrieve multiple byte ranges as part of a request for image data, not header metadata. The + /// default implementation will call `get_image_bytes` sequentially + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { @@ -43,7 +46,7 @@ pub trait AsyncFileReader: Debug + Send + Sync { let mut result = Vec::with_capacity(ranges.len()); for range in ranges.into_iter() { - let data = self.get_bytes(range).await?; + let data = self.get_image_bytes(range).await?; result.push(data); } @@ -55,15 +58,19 @@ pub trait AsyncFileReader: Debug + Send + Sync { /// This allows Box to be used as an AsyncFileReader, impl AsyncFileReader for Box { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.as_ref().get_bytes(range) + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.as_ref().get_metadata_bytes(range) + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.as_ref().get_image_bytes(range) } - fn get_byte_ranges( + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { - self.as_ref().get_byte_ranges(ranges) + self.as_ref().get_image_byte_ranges(ranges) } } @@ -89,31 +96,36 @@ impl Toki pub fn new(inner: T) -> Self { Self(tokio::sync::Mutex::new(inner)) } -} -#[cfg(feature = "tokio")] -impl AsyncFileReader - for TokioReader -{ - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + async fn make_range_request(&self, range: Range) -> AsyncTiffResult { use std::io::SeekFrom; use tokio::io::{AsyncReadExt, AsyncSeekExt}; - async move { - let mut file = self.0.lock().await; - - file.seek(SeekFrom::Start(range.start)).await?; + let mut file = self.0.lock().await; - let to_read = range.end - range.start; - let mut buffer = Vec::with_capacity(to_read as usize); - let read = file.read(&mut buffer).await? as u64; - if read != to_read { - return Err(AsyncTiffError::EndOfFile(to_read, read)); - } + file.seek(SeekFrom::Start(range.start)).await?; - Ok(buffer.into()) + let to_read = range.end - range.start; + let mut buffer = Vec::with_capacity(to_read as usize); + let read = file.read(&mut buffer).await? as u64; + if read != to_read { + return Err(AsyncTiffError::EndOfFile(to_read, read)); } - .boxed() + + Ok(buffer.into()) + } +} + +#[cfg(feature = "tokio")] +impl AsyncFileReader + for TokioReader +{ + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() } } @@ -133,19 +145,30 @@ impl ObjectReader { pub fn new(store: Arc, path: object_store::path::Path) -> Self { Self { store, path } } -} -#[cfg(feature = "object_store")] -impl AsyncFileReader for ObjectReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + async fn make_range_request(&self, range: Range) -> AsyncTiffResult { let range = range.start as _..range.end as _; self.store .get_range(&self.path, range) .map_err(|e| e.into()) - .boxed() + .await + } +} + +#[cfg(feature = "object_store")] +impl AsyncFileReader for ObjectReader { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() } - fn get_byte_ranges(&self, ranges: Vec>) -> BoxFuture<'_, AsyncTiffResult>> + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() + } + + fn get_image_byte_ranges( + &self, + ranges: Vec>, + ) -> BoxFuture<'_, AsyncTiffResult>> where Self: Send, { @@ -177,11 +200,8 @@ impl ReqwestReader { pub fn new(client: reqwest::Client, url: reqwest::Url) -> Self { Self { client, url } } -} -#[cfg(feature = "reqwest")] -impl AsyncFileReader for ReqwestReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn make_range_request(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { let url = self.url.clone(); let client = self.client.clone(); // HTTP range is inclusive, so we need to subtract 1 from the end @@ -195,6 +215,17 @@ impl AsyncFileReader for ReqwestReader { } } +#[cfg(feature = "reqwest")] +impl AsyncFileReader for ReqwestReader { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range) + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range) + } +} + /// An AsyncFileReader that caches the first `prefetch` bytes of a file. #[derive(Debug)] pub struct PrefetchReader { @@ -205,13 +236,13 @@ pub struct PrefetchReader { impl PrefetchReader { /// Construct a new PrefetchReader, catching the first `prefetch` bytes of the file. pub async fn new(reader: Arc, prefetch: u64) -> AsyncTiffResult { - let buffer = reader.get_bytes(0..prefetch).await?; + let buffer = reader.get_metadata_bytes(0..prefetch).await?; Ok(Self { reader, buffer }) } } impl AsyncFileReader for PrefetchReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { if range.start < self.buffer.len() as _ { if range.end < self.buffer.len() as _ { let usize_range = range.start as usize..range.end as usize; @@ -219,20 +250,29 @@ impl AsyncFileReader for PrefetchReader { async { Ok(result) }.boxed() } else { // TODO: reuse partial internal buffer - self.reader.get_bytes(range) + self.reader.get_metadata_bytes(range) } } else { - self.reader.get_bytes(range) + self.reader.get_metadata_bytes(range) } } - fn get_byte_ranges(&self, ranges: Vec>) -> BoxFuture<'_, AsyncTiffResult>> + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + // In practice, get_image_bytes is only used for fetching tiles, which are unlikely + // to overlap a metadata prefetch. + self.reader.get_image_bytes(range) + } + + fn get_image_byte_ranges( + &self, + ranges: Vec>, + ) -> BoxFuture<'_, AsyncTiffResult>> where Self: Send, { - // In practice, get_byte_ranges is only used for fetching tiles, which are unlikely to - // overlap a metadata prefetch. - self.reader.get_byte_ranges(ranges) + // In practice, get_image_byte_ranges is only used for fetching tiles, which are unlikely + // to overlap a metadata prefetch. + self.reader.get_image_byte_ranges(ranges) } } @@ -293,7 +333,7 @@ impl AsyncCursor { pub(crate) async fn read(&mut self, length: u64) -> AsyncTiffResult { let range = self.offset as _..(self.offset + length) as _; self.offset += length; - let bytes = self.reader.get_bytes(range).await?; + let bytes = self.reader.get_metadata_bytes(range).await?; Ok(EndianAwareReader { reader: bytes.reader(), endianness: self.endianness, From 0393bceb10876f62a982032e6d3705d839c787d3 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 24 Mar 2025 18:39:50 -0400 Subject: [PATCH 2/3] fix python compile --- python/src/reader.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/src/reader.rs b/python/src/reader.rs index 73580d4..31c49b4 100644 --- a/python/src/reader.rs +++ b/python/src/reader.rs @@ -115,11 +115,15 @@ struct ObspecReader { } impl AsyncFileReader for ObspecReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { self.backend.get_range_wrapper(&self.path, range).boxed() } - fn get_byte_ranges( + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.backend.get_range_wrapper(&self.path, range).boxed() + } + + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { From a60a3599bee0f7b65960251048840301030abda0 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 26 Mar 2025 10:50:12 -0400 Subject: [PATCH 3/3] Update docs --- src/ifd.rs | 2 +- src/reader.rs | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/ifd.rs b/src/ifd.rs index 913eb39..90f399f 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -809,7 +809,7 @@ impl ImageFileDirectory { }) .collect::>>()?; - // 2: Fetch using `get_ranges + // 2: Fetch using `get_image_byte_ranges` let buffers = reader.get_image_byte_ranges(byte_ranges).await?; // 3: Create tile objects diff --git a/src/reader.rs b/src/reader.rs index ef96684..b44beb3 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -20,12 +20,17 @@ use crate::error::{AsyncTiffError, AsyncTiffResult}; /// /// Notes: /// -/// 1. There is a default implementation for types that implement [`tokio::io::AsyncRead`] -/// and [`tokio::io::AsyncSeek`], for example [`tokio::fs::File`]. +/// 1. There are distinct traits for accessing "metadata bytes" and "image bytes". The requests for +/// "metadata bytes" from `get_metadata_bytes` will be called from `TIFF.open`, while parsing +/// IFDs. Requests for "image bytes" from `get_image_bytes` and `get_image_byte_ranges` will be +/// called while fetching data from TIFF tiles or strips. /// /// 2. [`ObjectReader`], available when the `object_store` crate feature /// is enabled, implements this interface for [`ObjectStore`]. /// +/// 3. You can use [`TokioReader`] to implement [`AsyncFileReader`] for types that implement +/// [`tokio::io::AsyncRead`] and [`tokio::io::AsyncSeek`], for example [`tokio::fs::File`]. +/// /// [`ObjectStore`]: object_store::ObjectStore /// /// [`tokio::fs::File`]: https://docs.rs/tokio/latest/tokio/fs/struct.File.html