From 3e4b0ba0c7a1911f310b0773547e1f325582a2ab Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 24 Mar 2025 18:37:20 -0400 Subject: [PATCH 01/11] Split traits to get image bytes and metadata bytes --- src/ifd.rs | 4 +- src/reader.rs | 134 ++++++++++++++++++++++++++++++++------------------ 2 files changed, 89 insertions(+), 49 deletions(-) diff --git a/src/ifd.rs b/src/ifd.rs index d522826..913eb39 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -779,7 +779,7 @@ impl ImageFileDirectory { let range = self .get_tile_byte_range(x, y) .ok_or(AsyncTiffError::General("Not a tiled TIFF".to_string()))?; - let compressed_bytes = reader.get_bytes(range).await?; + let compressed_bytes = reader.get_image_bytes(range).await?; Ok(Tile { x, y, @@ -810,7 +810,7 @@ impl ImageFileDirectory { .collect::>>()?; // 2: Fetch using `get_ranges - let buffers = reader.get_byte_ranges(byte_ranges).await?; + let buffers = reader.get_image_byte_ranges(byte_ranges).await?; // 3: Create tile objects let mut tiles = vec![]; diff --git a/src/reader.rs b/src/reader.rs index 1757dad..ef96684 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -30,12 +30,15 @@ use crate::error::{AsyncTiffError, AsyncTiffResult}; /// /// [`tokio::fs::File`]: https://docs.rs/tokio/latest/tokio/fs/struct.File.html pub trait AsyncFileReader: Debug + Send + Sync { - /// Retrieve the bytes in `range` - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; + /// Retrieve the bytes in `range` as part of a request for header metadata. + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; - /// Retrieve multiple byte ranges. The default implementation will call `get_bytes` - /// sequentially - fn get_byte_ranges( + /// Retrieve the bytes in `range` as part of a request for image data, not header metadata. + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult>; + + /// Retrieve multiple byte ranges as part of a request for image data, not header metadata. The + /// default implementation will call `get_image_bytes` sequentially + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { @@ -43,7 +46,7 @@ pub trait AsyncFileReader: Debug + Send + Sync { let mut result = Vec::with_capacity(ranges.len()); for range in ranges.into_iter() { - let data = self.get_bytes(range).await?; + let data = self.get_image_bytes(range).await?; result.push(data); } @@ -55,15 +58,19 @@ pub trait AsyncFileReader: Debug + Send + Sync { /// This allows Box to be used as an AsyncFileReader, impl AsyncFileReader for Box { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - self.as_ref().get_bytes(range) + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.as_ref().get_metadata_bytes(range) + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.as_ref().get_image_bytes(range) } - fn get_byte_ranges( + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { - self.as_ref().get_byte_ranges(ranges) + self.as_ref().get_image_byte_ranges(ranges) } } @@ -89,31 +96,36 @@ impl Toki pub fn new(inner: T) -> Self { Self(tokio::sync::Mutex::new(inner)) } -} -#[cfg(feature = "tokio")] -impl AsyncFileReader - for TokioReader -{ - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + async fn make_range_request(&self, range: Range) -> AsyncTiffResult { use std::io::SeekFrom; use tokio::io::{AsyncReadExt, AsyncSeekExt}; - async move { - let mut file = self.0.lock().await; - - file.seek(SeekFrom::Start(range.start)).await?; + let mut file = self.0.lock().await; - let to_read = range.end - range.start; - let mut buffer = Vec::with_capacity(to_read as usize); - let read = file.read(&mut buffer).await? as u64; - if read != to_read { - return Err(AsyncTiffError::EndOfFile(to_read, read)); - } + file.seek(SeekFrom::Start(range.start)).await?; - Ok(buffer.into()) + let to_read = range.end - range.start; + let mut buffer = Vec::with_capacity(to_read as usize); + let read = file.read(&mut buffer).await? as u64; + if read != to_read { + return Err(AsyncTiffError::EndOfFile(to_read, read)); } - .boxed() + + Ok(buffer.into()) + } +} + +#[cfg(feature = "tokio")] +impl AsyncFileReader + for TokioReader +{ + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() } } @@ -133,19 +145,30 @@ impl ObjectReader { pub fn new(store: Arc, path: object_store::path::Path) -> Self { Self { store, path } } -} -#[cfg(feature = "object_store")] -impl AsyncFileReader for ObjectReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + async fn make_range_request(&self, range: Range) -> AsyncTiffResult { let range = range.start as _..range.end as _; self.store .get_range(&self.path, range) .map_err(|e| e.into()) - .boxed() + .await + } +} + +#[cfg(feature = "object_store")] +impl AsyncFileReader for ObjectReader { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() } - fn get_byte_ranges(&self, ranges: Vec>) -> BoxFuture<'_, AsyncTiffResult>> + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range).boxed() + } + + fn get_image_byte_ranges( + &self, + ranges: Vec>, + ) -> BoxFuture<'_, AsyncTiffResult>> where Self: Send, { @@ -177,11 +200,8 @@ impl ReqwestReader { pub fn new(client: reqwest::Client, url: reqwest::Url) -> Self { Self { client, url } } -} -#[cfg(feature = "reqwest")] -impl AsyncFileReader for ReqwestReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn make_range_request(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { let url = self.url.clone(); let client = self.client.clone(); // HTTP range is inclusive, so we need to subtract 1 from the end @@ -195,6 +215,17 @@ impl AsyncFileReader for ReqwestReader { } } +#[cfg(feature = "reqwest")] +impl AsyncFileReader for ReqwestReader { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range) + } + + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.make_range_request(range) + } +} + /// An AsyncFileReader that caches the first `prefetch` bytes of a file. #[derive(Debug)] pub struct PrefetchReader { @@ -205,13 +236,13 @@ pub struct PrefetchReader { impl PrefetchReader { /// Construct a new PrefetchReader, catching the first `prefetch` bytes of the file. pub async fn new(reader: Arc, prefetch: u64) -> AsyncTiffResult { - let buffer = reader.get_bytes(0..prefetch).await?; + let buffer = reader.get_metadata_bytes(0..prefetch).await?; Ok(Self { reader, buffer }) } } impl AsyncFileReader for PrefetchReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { if range.start < self.buffer.len() as _ { if range.end < self.buffer.len() as _ { let usize_range = range.start as usize..range.end as usize; @@ -219,20 +250,29 @@ impl AsyncFileReader for PrefetchReader { async { Ok(result) }.boxed() } else { // TODO: reuse partial internal buffer - self.reader.get_bytes(range) + self.reader.get_metadata_bytes(range) } } else { - self.reader.get_bytes(range) + self.reader.get_metadata_bytes(range) } } - fn get_byte_ranges(&self, ranges: Vec>) -> BoxFuture<'_, AsyncTiffResult>> + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + // In practice, get_image_bytes is only used for fetching tiles, which are unlikely + // to overlap a metadata prefetch. + self.reader.get_image_bytes(range) + } + + fn get_image_byte_ranges( + &self, + ranges: Vec>, + ) -> BoxFuture<'_, AsyncTiffResult>> where Self: Send, { - // In practice, get_byte_ranges is only used for fetching tiles, which are unlikely to - // overlap a metadata prefetch. - self.reader.get_byte_ranges(ranges) + // In practice, get_image_byte_ranges is only used for fetching tiles, which are unlikely + // to overlap a metadata prefetch. + self.reader.get_image_byte_ranges(ranges) } } @@ -293,7 +333,7 @@ impl AsyncCursor { pub(crate) async fn read(&mut self, length: u64) -> AsyncTiffResult { let range = self.offset as _..(self.offset + length) as _; self.offset += length; - let bytes = self.reader.get_bytes(range).await?; + let bytes = self.reader.get_metadata_bytes(range).await?; Ok(EndianAwareReader { reader: bytes.reader(), endianness: self.endianness, From 0393bceb10876f62a982032e6d3705d839c787d3 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 24 Mar 2025 18:39:50 -0400 Subject: [PATCH 02/11] fix python compile --- python/src/reader.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/src/reader.rs b/python/src/reader.rs index 73580d4..31c49b4 100644 --- a/python/src/reader.rs +++ b/python/src/reader.rs @@ -115,11 +115,15 @@ struct ObspecReader { } impl AsyncFileReader for ObspecReader { - fn get_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { self.backend.get_range_wrapper(&self.path, range).boxed() } - fn get_byte_ranges( + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.backend.get_range_wrapper(&self.path, range).boxed() + } + + fn get_image_byte_ranges( &self, ranges: Vec>, ) -> BoxFuture<'_, AsyncTiffResult>> { From 34331967ae11f94c0f4e4165130ac63bcf83ca57 Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Sun, 23 Mar 2025 03:01:51 +0100 Subject: [PATCH 03/11] refactored tag reading so we first create an internal buffer on the data and use that --- src/ifd.rs | 307 +++++++++++------------------------------------ src/tiff/tags.rs | 16 +++ 2 files changed, 87 insertions(+), 236 deletions(-) diff --git a/src/ifd.rs b/src/ifd.rs index 913eb39..a3daa60 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::io::Read; use std::ops::Range; -use bytes::Bytes; +use bytes::{buf::Buf, Bytes}; use num_enum::TryFromPrimitive; use crate::error::{AsyncTiffError, AsyncTiffResult}; @@ -878,56 +878,37 @@ async fn read_tag_value( return Ok(Value::List(vec![])); } - let tag_size = match tag_type { - Type::BYTE | Type::SBYTE | Type::ASCII | Type::UNDEFINED => 1, - Type::SHORT | Type::SSHORT => 2, - Type::LONG | Type::SLONG | Type::FLOAT | Type::IFD => 4, - Type::LONG8 - | Type::SLONG8 - | Type::DOUBLE - | Type::RATIONAL - | Type::SRATIONAL - | Type::IFD8 => 8, - }; + let tag_size = tag_type.size(); let value_byte_length = count.checked_mul(tag_size).unwrap(); + // prefetch all tag data + let mut data = if (bigtiff && value_byte_length <= 8) || value_byte_length <= 4 { + // value fits in offset field + cursor.read(value_byte_length).await? + } else { + // Seek cursor + let offset = if bigtiff { + cursor.read_u64().await? + } else { + cursor.read_u32().await?.into() + }; + cursor.seek(offset); + cursor.read(value_byte_length).await? + }; // Case 2: there is one value. if count == 1 { - // 2a: the value is 5-8 bytes and we're in BigTiff mode. - if bigtiff && value_byte_length > 4 && value_byte_length <= 8 { - let mut data = cursor.read(value_byte_length).await?; - - return Ok(match tag_type { - Type::LONG8 => Value::UnsignedBig(data.read_u64()?), - Type::SLONG8 => Value::SignedBig(data.read_i64()?), - Type::DOUBLE => Value::Double(data.read_f64()?), - Type::RATIONAL => Value::Rational(data.read_u32()?, data.read_u32()?), - Type::SRATIONAL => Value::SRational(data.read_i32()?, data.read_i32()?), - Type::IFD8 => Value::IfdBig(data.read_u64()?), - Type::BYTE - | Type::SBYTE - | Type::ASCII - | Type::UNDEFINED - | Type::SHORT - | Type::SSHORT - | Type::LONG - | Type::SLONG - | Type::FLOAT - | Type::IFD => unreachable!(), - }); - } - - // NOTE: we should only be reading value_byte_length when it's 4 bytes or fewer. Right now - // we're reading even if it's 8 bytes, but then only using the first 4 bytes of this - // buffer. - let mut data = cursor.read(value_byte_length).await?; - - // 2b: the value is at most 4 bytes or doesn't fit in the offset field. return Ok(match tag_type { + Type::LONG8 => Value::UnsignedBig(data.read_u64()?), + Type::SLONG8 => Value::SignedBig(data.read_i64()?), + Type::DOUBLE => Value::Double(data.read_f64()?), + Type::RATIONAL => Value::Rational(data.read_u32()?, data.read_u32()?), + Type::SRATIONAL => Value::SRational(data.read_i32()?, data.read_i32()?), + Type::IFD8 => Value::IfdBig(data.read_u64()?), Type::BYTE | Type::UNDEFINED => Value::Byte(data.read_u8()?), Type::SBYTE => Value::Signed(data.read_i8()? as i32), Type::SHORT => Value::Short(data.read_u16()?), + Type::IFD => Value::Ifd(data.read_u32()?), Type::SSHORT => Value::Signed(data.read_i16()? as i32), Type::LONG => Value::Unsigned(data.read_u32()?), Type::SLONG => Value::Signed(data.read_i32()?), @@ -940,266 +921,120 @@ async fn read_tag_value( // return Err(TiffError::FormatError(TiffFormatError::InvalidTag)); } } - Type::LONG8 => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - Value::UnsignedBig(cursor.read_u64().await?) - } - Type::SLONG8 => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - Value::SignedBig(cursor.read_i64().await?) - } - Type::DOUBLE => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - Value::Double(cursor.read_f64().await?) - } - Type::RATIONAL => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - let numerator = cursor.read_u32().await?; - let denominator = cursor.read_u32().await?; - Value::Rational(numerator, denominator) - } - Type::SRATIONAL => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - let numerator = cursor.read_i32().await?; - let denominator = cursor.read_i32().await?; - Value::SRational(numerator, denominator) - } - Type::IFD => Value::Ifd(data.read_u32()?), - Type::IFD8 => { - let offset = data.read_u32()?; - cursor.seek(offset as _); - Value::IfdBig(cursor.read_u64().await?) - } }); } - // Case 3: There is more than one value, but it fits in the offset field. - if value_byte_length <= 4 || bigtiff && value_byte_length <= 8 { - let mut data = cursor.read(value_byte_length).await?; - if bigtiff { - cursor.advance(8 - value_byte_length); - } else { - cursor.advance(4 - value_byte_length); - } - - match tag_type { - Type::BYTE | Type::UNDEFINED => { - return { - Ok(Value::List( - (0..count) - .map(|_| Value::Byte(data.read_u8().unwrap())) - .collect(), - )) - }; - } - Type::SBYTE => { - return { - Ok(Value::List( - (0..count) - .map(|_| Value::Signed(data.read_i8().unwrap() as i32)) - .collect(), - )) - } - } - Type::ASCII => { - let mut buf = vec![0; count as usize]; - data.read_exact(&mut buf)?; - if buf.is_ascii() && buf.ends_with(&[0]) { - let v = std::str::from_utf8(&buf) - .map_err(|err| AsyncTiffError::General(err.to_string()))?; - let v = v.trim_matches(char::from(0)); - return Ok(Value::Ascii(v.into())); - } else { - panic!("Invalid tag"); - // return Err(TiffError::FormatError(TiffFormatError::InvalidTag)); - } - } - Type::SHORT => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Short(data.read_u16()?)); - } - return Ok(Value::List(v)); - } - Type::SSHORT => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Signed(i32::from(data.read_i16()?))); - } - return Ok(Value::List(v)); - } - Type::LONG => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Unsigned(data.read_u32()?)); - } - return Ok(Value::List(v)); - } - Type::SLONG => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Signed(data.read_i32()?)); - } - return Ok(Value::List(v)); - } - Type::FLOAT => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Float(data.read_f32()?)); - } - return Ok(Value::List(v)); - } - Type::IFD => { - let mut v = Vec::new(); - for _ in 0..count { - v.push(Value::Ifd(data.read_u32()?)); - } - return Ok(Value::List(v)); - } - Type::LONG8 - | Type::SLONG8 - | Type::RATIONAL - | Type::SRATIONAL - | Type::DOUBLE - | Type::IFD8 => { - unreachable!() - } - } - } - - // Seek cursor - let offset = if bigtiff { - cursor.read_u64().await? - } else { - cursor.read_u32().await?.into() - }; - cursor.seek(offset); - - // Case 4: there is more than one value, and it doesn't fit in the offset field. match tag_type { - // TODO check if this could give wrong results - // at a different endianess of file/computer. Type::BYTE | Type::UNDEFINED => { - let mut v = Vec::with_capacity(count as _); + let mut v = Vec::new(); for _ in 0..count { - v.push(Value::Byte(cursor.read_u8().await?)) + v.push(Value::Byte(data.read_u8()?)); } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::SBYTE => { - let mut v = Vec::with_capacity(count as _); + let mut v = Vec::new(); for _ in 0..count { - v.push(Value::Signed(cursor.read_i8().await? as i32)) + v.push(Value::SignedByte(data.read_i8()?)); + } + return Ok(Value::List(v)); + } + Type::ASCII => { + let mut buf = vec![0; count as usize]; + data.read_exact(&mut buf)?; + if buf.is_ascii() && buf.ends_with(&[0]) { + let v = std::str::from_utf8(&buf) + .map_err(|err| AsyncTiffError::General(err.to_string()))?; + let v = v.trim_matches(char::from(0)); + return Ok(Value::Ascii(v.into())); + } else { + panic!("Invalid tag"); + // return Err(TiffError::FormatError(TiffFormatError::InvalidTag)); } - Ok(Value::List(v)) } Type::SHORT => { - let mut v = Vec::with_capacity(count as _); + let mut v = Vec::new(); for _ in 0..count { - v.push(Value::Short(cursor.read_u16().await?)) + v.push(Value::Short(data.read_u16()?)); } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::SSHORT => { - let mut v = Vec::with_capacity(count as _); + let mut v = Vec::new(); for _ in 0..count { - v.push(Value::Signed(cursor.read_i16().await? as i32)) + v.push(Value::Signed(i32::from(data.read_i16()?))); } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::LONG => { - let mut v = Vec::with_capacity(count as _); + let mut v = Vec::new(); for _ in 0..count { - v.push(Value::Unsigned(cursor.read_u32().await?)) + v.push(Value::Unsigned(data.read_u32()?)); } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::SLONG => { - let mut v = Vec::with_capacity(count as _); + let mut v = Vec::new(); for _ in 0..count { - v.push(Value::Signed(cursor.read_i32().await?)) + v.push(Value::Signed(data.read_i32()?)); } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::FLOAT => { - let mut v = Vec::with_capacity(count as _); + let mut v = Vec::new(); for _ in 0..count { - v.push(Value::Float(cursor.read_f32().await?)) + v.push(Value::Float(data.read_f32()?)); } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::DOUBLE => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { - v.push(Value::Double(cursor.read_f64().await?)) + v.push(Value::Double(data.read_f64()?)) } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::RATIONAL => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { - v.push(Value::Rational( - cursor.read_u32().await?, - cursor.read_u32().await?, - )) + v.push(Value::Rational(data.read_u32()?, data.read_u32()?)) } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::SRATIONAL => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { - v.push(Value::SRational( - cursor.read_i32().await?, - cursor.read_i32().await?, - )) + v.push(Value::SRational(data.read_i32()?, data.read_i32()?)) } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::LONG8 => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { - v.push(Value::UnsignedBig(cursor.read_u64().await?)) + v.push(Value::UnsignedBig(data.read_u64()?)) } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::SLONG8 => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { - v.push(Value::SignedBig(cursor.read_i64().await?)) + v.push(Value::SignedBig(data.read_i64()?)) } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::IFD => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { - v.push(Value::Ifd(cursor.read_u32().await?)) + v.push(Value::Ifd(data.read_u32()?)) } - Ok(Value::List(v)) + return Ok(Value::List(v)); } Type::IFD8 => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { - v.push(Value::IfdBig(cursor.read_u64().await?)) - } - Ok(Value::List(v)) - } - Type::ASCII => { - let mut out = vec![0; count as _]; - let mut buf = cursor.read(count).await?; - buf.read_exact(&mut out)?; - - // Strings may be null-terminated, so we trim anything downstream of the null byte - if let Some(first) = out.iter().position(|&b| b == 0) { - out.truncate(first); + v.push(Value::IfdBig(data.read_u64()?)) } - Ok(Value::Ascii( - String::from_utf8(out).map_err(|err| AsyncTiffError::General(err.to_string()))?, - )) + return Ok(Value::List(v)); } } } diff --git a/src/tiff/tags.rs b/src/tiff/tags.rs index 524a726..567f314 100644 --- a/src/tiff/tags.rs +++ b/src/tiff/tags.rs @@ -175,6 +175,22 @@ pub enum Type(u16) { } } +impl Type { + pub const fn size(&self) -> u64 { + match self { + Type::BYTE | Type::SBYTE | Type::ASCII | Type::UNDEFINED => 1, + Type::SHORT | Type::SSHORT => 2, + Type::LONG | Type::SLONG | Type::FLOAT | Type::IFD => 4, + Type::LONG8 + | Type::SLONG8 + | Type::DOUBLE + | Type::RATIONAL + | Type::SRATIONAL + | Type::IFD8 => 8, + } + } +} + tags! { /// See [TIFF compression tags](https://www.awaresystems.be/imaging/tiff/tifftags/compression.html) /// for reference. From acd769685bf2f8abfb2d1e40e1816b2ef0242ee9 Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Sun, 23 Mar 2025 15:30:59 +0100 Subject: [PATCH 04/11] fixed cursor handling --- src/ifd.rs | 24 ++++++++++++++++-------- src/reader.rs | 3 +++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/ifd.rs b/src/ifd.rs index a3daa60..d337c20 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -7,7 +7,7 @@ use num_enum::TryFromPrimitive; use crate::error::{AsyncTiffError, AsyncTiffResult}; use crate::geo::{GeoKeyDirectory, GeoKeyTag}; -use crate::reader::{AsyncCursor, AsyncFileReader}; +use crate::reader::{AsyncCursor, AsyncFileReader, EndianAwareReader}; use crate::tiff::tags::{ CompressionMethod, PhotometricInterpretation, PlanarConfiguration, Predictor, ResolutionUnit, SampleFormat, Tag, Type, @@ -839,7 +839,7 @@ impl ImageFileDirectory { /// Read a single tag from the cursor async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(Tag, Value)> { - let start_cursor_position = cursor.position(); + // let start_cursor_position = cursor.position(); let tag_name = Tag::from_u16_exhaustive(cursor.read_u16().await?); @@ -855,9 +855,9 @@ async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(T let tag_value = read_tag_value(cursor, tag_type, count, bigtiff).await?; - // TODO: better handle management of cursor state - let ifd_entry_size = if bigtiff { 20 } else { 12 }; - cursor.seek(start_cursor_position + ifd_entry_size); + // TODO: better handle management of cursor state <- should be done now + // let ifd_entry_size = if bigtiff { 20 } else { 12 }; + // cursor.seek(start_cursor_position + ifd_entry_size); Ok((tag_name, tag_value)) } @@ -885,7 +885,13 @@ async fn read_tag_value( // prefetch all tag data let mut data = if (bigtiff && value_byte_length <= 8) || value_byte_length <= 4 { // value fits in offset field - cursor.read(value_byte_length).await? + let res = cursor.read(value_byte_length).await?; + if bigtiff { + cursor.advance(8-value_byte_length); + } else { + cursor.advance(4-value_byte_length); + } + res } else { // Seek cursor let offset = if bigtiff { @@ -893,8 +899,10 @@ async fn read_tag_value( } else { cursor.read_u32().await?.into() }; - cursor.seek(offset); - cursor.read(value_byte_length).await? + let reader = cursor.reader().get_bytes(offset..offset+value_byte_length).await?.reader(); + EndianAwareReader::new(reader, cursor.endianness()) + // cursor.seek(offset); + // cursor.read(value_byte_length).await? }; // Case 2: there is one value. if count == 1 { diff --git a/src/reader.rs b/src/reader.rs index ef96684..945307e 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -418,6 +418,9 @@ pub(crate) struct EndianAwareReader { } impl EndianAwareReader { + pub(crate) fn new(reader: Reader, endianness: Endianness) -> Self { + Self { reader, endianness } + } /// Read a u8 from the cursor, advancing the internal state by 1 byte. pub(crate) fn read_u8(&mut self) -> AsyncTiffResult { Ok(self.reader.read_u8()?) From b0bddcfd454ae94f3f0d4b73c52e491888c6fbe8 Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Sun, 23 Mar 2025 21:04:54 +0100 Subject: [PATCH 05/11] fixed clippy --- src/ifd.rs | 54 +++++++++++++++++++++++++++------------------------ src/reader.rs | 12 ++++++++++-- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/ifd.rs b/src/ifd.rs index d337c20..7c630b6 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -887,9 +887,9 @@ async fn read_tag_value( // value fits in offset field let res = cursor.read(value_byte_length).await?; if bigtiff { - cursor.advance(8-value_byte_length); + cursor.advance(8 - value_byte_length); } else { - cursor.advance(4-value_byte_length); + cursor.advance(4 - value_byte_length); } res } else { @@ -899,7 +899,11 @@ async fn read_tag_value( } else { cursor.read_u32().await?.into() }; - let reader = cursor.reader().get_bytes(offset..offset+value_byte_length).await?.reader(); + let reader = cursor + .reader() + .get_bytes(offset..offset + value_byte_length) + .await? + .reader(); EndianAwareReader::new(reader, cursor.endianness()) // cursor.seek(offset); // cursor.read(value_byte_length).await? @@ -934,18 +938,18 @@ async fn read_tag_value( match tag_type { Type::BYTE | Type::UNDEFINED => { - let mut v = Vec::new(); + let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Byte(data.read_u8()?)); } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::SBYTE => { - let mut v = Vec::new(); + let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::SignedByte(data.read_i8()?)); } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::ASCII => { let mut buf = vec![0; count as usize]; @@ -954,95 +958,95 @@ async fn read_tag_value( let v = std::str::from_utf8(&buf) .map_err(|err| AsyncTiffError::General(err.to_string()))?; let v = v.trim_matches(char::from(0)); - return Ok(Value::Ascii(v.into())); + Ok(Value::Ascii(v.into())) } else { panic!("Invalid tag"); // return Err(TiffError::FormatError(TiffFormatError::InvalidTag)); } } Type::SHORT => { - let mut v = Vec::new(); + let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Short(data.read_u16()?)); } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::SSHORT => { - let mut v = Vec::new(); + let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Signed(i32::from(data.read_i16()?))); } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::LONG => { - let mut v = Vec::new(); + let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Unsigned(data.read_u32()?)); } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::SLONG => { - let mut v = Vec::new(); + let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Signed(data.read_i32()?)); } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::FLOAT => { - let mut v = Vec::new(); + let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Float(data.read_f32()?)); } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::DOUBLE => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Double(data.read_f64()?)) } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::RATIONAL => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Rational(data.read_u32()?, data.read_u32()?)) } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::SRATIONAL => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::SRational(data.read_i32()?, data.read_i32()?)) } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::LONG8 => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::UnsignedBig(data.read_u64()?)) } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::SLONG8 => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::SignedBig(data.read_i64()?)) } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::IFD => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Ifd(data.read_u32()?)) } - return Ok(Value::List(v)); + Ok(Value::List(v)) } Type::IFD8 => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::IfdBig(data.read_u64()?)) } - return Ok(Value::List(v)); + Ok(Value::List(v)) } } } diff --git a/src/reader.rs b/src/reader.rs index 945307e..9a77997 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -341,31 +341,37 @@ impl AsyncCursor { } /// Read a u8 from the cursor, advancing the internal state by 1 byte. + #[allow(dead_code)] pub(crate) async fn read_u8(&mut self) -> AsyncTiffResult { self.read(1).await?.read_u8() } /// Read a i8 from the cursor, advancing the internal state by 1 byte. + #[allow(dead_code)] pub(crate) async fn read_i8(&mut self) -> AsyncTiffResult { self.read(1).await?.read_i8() } /// Read a u16 from the cursor, advancing the internal state by 2 bytes. + #[allow(dead_code)] pub(crate) async fn read_u16(&mut self) -> AsyncTiffResult { self.read(2).await?.read_u16() } /// Read a i16 from the cursor, advancing the internal state by 2 bytes. + #[allow(dead_code)] pub(crate) async fn read_i16(&mut self) -> AsyncTiffResult { self.read(2).await?.read_i16() } /// Read a u32 from the cursor, advancing the internal state by 4 bytes. + #[allow(dead_code)] pub(crate) async fn read_u32(&mut self) -> AsyncTiffResult { self.read(4).await?.read_u32() } /// Read a i32 from the cursor, advancing the internal state by 4 bytes. + #[allow(dead_code)] pub(crate) async fn read_i32(&mut self) -> AsyncTiffResult { self.read(4).await?.read_i32() } @@ -376,24 +382,25 @@ impl AsyncCursor { } /// Read a i64 from the cursor, advancing the internal state by 8 bytes. + #[allow(dead_code)] pub(crate) async fn read_i64(&mut self) -> AsyncTiffResult { self.read(8).await?.read_i64() } + #[allow(dead_code)] pub(crate) async fn read_f32(&mut self) -> AsyncTiffResult { self.read(4).await?.read_f32() } + #[allow(dead_code)] pub(crate) async fn read_f64(&mut self) -> AsyncTiffResult { self.read(8).await?.read_f64() } - #[allow(dead_code)] pub(crate) fn reader(&self) -> &Arc { &self.reader } - #[allow(dead_code)] pub(crate) fn endianness(&self) -> Endianness { self.endianness } @@ -407,6 +414,7 @@ impl AsyncCursor { self.offset = offset; } + #[allow(dead_code)] pub(crate) fn position(&self) -> u64 { self.offset } From feefaddeed46c5632250dbec853cbeb9152c45d2 Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Tue, 25 Mar 2025 19:48:48 +0100 Subject: [PATCH 06/11] small changes to decrease diff size --- src/ifd.rs | 22 ++++++++++++---------- src/tiff/tags.rs | 16 ---------------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/src/ifd.rs b/src/ifd.rs index 7c630b6..979a259 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -839,8 +839,6 @@ impl ImageFileDirectory { /// Read a single tag from the cursor async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(Tag, Value)> { - // let start_cursor_position = cursor.position(); - let tag_name = Tag::from_u16_exhaustive(cursor.read_u16().await?); let tag_type_code = cursor.read_u16().await?; @@ -855,10 +853,6 @@ async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(T let tag_value = read_tag_value(cursor, tag_type, count, bigtiff).await?; - // TODO: better handle management of cursor state <- should be done now - // let ifd_entry_size = if bigtiff { 20 } else { 12 }; - // cursor.seek(start_cursor_position + ifd_entry_size); - Ok((tag_name, tag_value)) } @@ -878,7 +872,17 @@ async fn read_tag_value( return Ok(Value::List(vec![])); } - let tag_size = tag_type.size(); + let tag_size = match tag_type { + Type::BYTE | Type::SBYTE | Type::ASCII | Type::UNDEFINED => 1, + Type::SHORT | Type::SSHORT => 2, + Type::LONG | Type::SLONG | Type::FLOAT | Type::IFD => 4, + Type::LONG8 + | Type::SLONG8 + | Type::DOUBLE + | Type::RATIONAL + | Type::SRATIONAL + | Type::IFD8 => 8, + }; let value_byte_length = count.checked_mul(tag_size).unwrap(); @@ -901,12 +905,10 @@ async fn read_tag_value( }; let reader = cursor .reader() - .get_bytes(offset..offset + value_byte_length) + .get_metadata_bytes(offset..offset + value_byte_length) .await? .reader(); EndianAwareReader::new(reader, cursor.endianness()) - // cursor.seek(offset); - // cursor.read(value_byte_length).await? }; // Case 2: there is one value. if count == 1 { diff --git a/src/tiff/tags.rs b/src/tiff/tags.rs index 567f314..524a726 100644 --- a/src/tiff/tags.rs +++ b/src/tiff/tags.rs @@ -175,22 +175,6 @@ pub enum Type(u16) { } } -impl Type { - pub const fn size(&self) -> u64 { - match self { - Type::BYTE | Type::SBYTE | Type::ASCII | Type::UNDEFINED => 1, - Type::SHORT | Type::SSHORT => 2, - Type::LONG | Type::SLONG | Type::FLOAT | Type::IFD => 4, - Type::LONG8 - | Type::SLONG8 - | Type::DOUBLE - | Type::RATIONAL - | Type::SRATIONAL - | Type::IFD8 => 8, - } - } -} - tags! { /// See [TIFF compression tags](https://www.awaresystems.be/imaging/tiff/tifftags/compression.html) /// for reference. From 4ca706bbe1b4b1a7366b2813e6c68fcee13dfe43 Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Tue, 25 Mar 2025 21:21:01 +0100 Subject: [PATCH 07/11] made SignedByte cast to Signed --- src/ifd.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ifd.rs b/src/ifd.rs index 979a259..f390342 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -949,7 +949,7 @@ async fn read_tag_value( Type::SBYTE => { let mut v = Vec::with_capacity(count as _); for _ in 0..count { - v.push(Value::SignedByte(data.read_i8()?)); + v.push(Value::Signed(data.read_i8()? as i32)); } Ok(Value::List(v)) } From ab1f6a147ad536ad77543346b520be728c30168f Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Tue, 25 Mar 2025 22:04:01 +0100 Subject: [PATCH 08/11] made prefetchreader also hold an internal cache --- Cargo.toml | 4 +++- src/reader.rs | 50 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e669dc3..0725e3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ description = "Low-level asynchronous TIFF reader." readme = "README.md" [dependencies] +async-mutex = { version = "1.4.0", optional = true } byteorder = "1" bytes = "1.7.0" flate2 = "1.0.20" @@ -35,9 +36,10 @@ tokio = { version = "1.9", features = [ ] } [features] -default = ["object_store", "reqwest"] +default = ["object_store", "reqwest", "async_mutex"] tokio = ["dep:tokio"] reqwest = ["dep:reqwest"] object_store = ["dep:object_store"] +async_mutex = ["dep:async-mutex"] [package.metadata.cargo-all-features] diff --git a/src/reader.rs b/src/reader.rs index 9a77997..f48addb 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -13,6 +13,13 @@ use futures::TryFutureExt; use crate::error::{AsyncTiffError, AsyncTiffResult}; +#[cfg(all(not(feature = "tokio"), feature = "async_mutex"))] +use async_mutex::Mutex; +#[cfg(feature = "tokio")] +use tokio::sync::Mutex; +#[cfg(not(any(feature="tokio", feature="async_mutex")))] +compile_error!("at least one of 'tokio' or 'async_mutex' features should be enabled"); + /// The asynchronous interface used to read COG files /// /// This was derived from the Parquet @@ -231,29 +238,50 @@ impl AsyncFileReader for ReqwestReader { pub struct PrefetchReader { reader: Arc, buffer: Bytes, + tile_info_cache: Mutex<(Range, Bytes)>, } impl PrefetchReader { /// Construct a new PrefetchReader, catching the first `prefetch` bytes of the file. pub async fn new(reader: Arc, prefetch: u64) -> AsyncTiffResult { let buffer = reader.get_metadata_bytes(0..prefetch).await?; - Ok(Self { reader, buffer }) + let tile_info_cache = Mutex::new((0..0, Bytes::new())); + Ok(Self { + reader, + buffer, + tile_info_cache, + }) } } impl AsyncFileReader for PrefetchReader { fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { - if range.start < self.buffer.len() as _ { - if range.end < self.buffer.len() as _ { - let usize_range = range.start as usize..range.end as usize; - let result = self.buffer.slice(usize_range); - async { Ok(result) }.boxed() - } else { - // TODO: reuse partial internal buffer - self.reader.get_metadata_bytes(range) - } + if range.end < self.buffer.len() as _ { + let usize_range = range.start as usize..range.end as usize; + let result = self.buffer.slice(usize_range); + async { Ok(result) }.boxed() } else { - self.reader.get_metadata_bytes(range) + async move { + { + let lock = self.tile_info_cache.lock().await; + // let (c_range, cache) = (lock.0, lock.1); + if range.start >= lock.0.start && range.end <= lock.0.end { + let usize_range = (range.start - lock.0.start) as usize + ..(range.end - lock.0.start) as usize; + return Ok(lock.1.slice(usize_range)); + } + } + let range_len = range.end - range.start; + let estimate = 2 * (range_len + range_len.isqrt()); + let new_c_range = range.start..range.start + estimate; + let res = self.reader.get_metadata_bytes(new_c_range.clone()).await?; + { + let mut lock = self.tile_info_cache.lock().await; + *lock = (new_c_range, res.clone()); + } + Ok(res.slice(0..range_len as _)) + } + .boxed() } } From feefaddee4168b304a20635b2265d0e5e415727c Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Wed, 26 Mar 2025 04:34:11 +0100 Subject: [PATCH 09/11] added docs+doctest to prefetchreader --- Cargo.toml | 1 + src/reader.rs | 78 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0725e3a..1e2c165 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ tokio = { version = "1.9", features = [ "macros", "fs", "rt-multi-thread", + # "rt", "io-util", ] } diff --git a/src/reader.rs b/src/reader.rs index f48addb..48cce78 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -17,7 +17,7 @@ use crate::error::{AsyncTiffError, AsyncTiffResult}; use async_mutex::Mutex; #[cfg(feature = "tokio")] use tokio::sync::Mutex; -#[cfg(not(any(feature="tokio", feature="async_mutex")))] +#[cfg(not(any(feature = "tokio", feature = "async_mutex")))] compile_error!("at least one of 'tokio' or 'async_mutex' features should be enabled"); /// The asynchronous interface used to read COG files @@ -233,7 +233,69 @@ impl AsyncFileReader for ReqwestReader { } } -/// An AsyncFileReader that caches the first `prefetch` bytes of a file. +impl AsyncFileReader for Bytes { + fn get_image_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + self.get_metadata_bytes(range) + } + + fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + if range.end <= self.len() as _ { + let usize_range = range.start as usize..range.end as usize; + async { Ok(self.slice(usize_range)) }.boxed() + } else { + let range_len = range.end - range.start; + let range_read = range_len - (range.end-self.len() as u64); + async move { Err(AsyncTiffError::EndOfFile(range_len, range_read)) }.boxed() + } + } +} + +/// An AsyncFileReader that caches the first `prefetch` bytes of a file. Also +/// holds a cache for out-of-prefetch metadata. +/// +/// When a request is out-of-bounds, an estimate of the remaining +/// `tile_offsets`/`tile_byte_counts` array is made as `2*(len+2*len.isqrt())` +/// +/// # Examples +/// +/// ``` +/// # futures::executor::block_on( async { // https://stackoverflow.com/a/64597248/14681457 +/// # use async_tiff::error::AsyncTiffError; +/// # use async_tiff::reader::{AsyncFileReader, PrefetchReader}; +/// # use bytes::Bytes; +/// # use std::sync::Arc; +/// # +/// # let file = Bytes::from_static(&[42u8; 128 * 1024]); +/// # +/// // create a reader that prefetches the first 16 kB +/// let reader = PrefetchReader::new(Arc::new(file), 16 * 1024).await?; +/// +/// // get some data from the prefetch +/// assert_eq!(*reader.get_metadata_bytes(42..1312).await?, [42; 1312 - 42]); +/// +/// const start: u64 = 16 * 1024; +/// const len: u64 = 16 * 1024; +/// // get some data from outside the prefetch range +/// assert_eq!( +/// *reader.get_metadata_bytes(start..start + len).await?, +/// [42; 16 * 1024] +/// ); +/// +/// // this is now also (exactly) cached +/// assert_eq!( +/// *reader +/// .get_metadata_bytes(start..start + 2 * (len + 2 * len.isqrt())) +/// .await?, +/// [42; 2 * (len + 2 * len.isqrt()) as usize] +/// ); +/// +/// // this will not check the cache +/// reader.get_image_bytes(start..start + len).await?; +/// +/// # Ok::<(),AsyncTiffError>(()) +/// # }); +/// ``` +#[cfg(any(feature = "tokio", feature = "async_mutex"))] #[derive(Debug)] pub struct PrefetchReader { reader: Arc, @@ -241,6 +303,7 @@ pub struct PrefetchReader { tile_info_cache: Mutex<(Range, Bytes)>, } +#[cfg(any(feature = "tokio", feature = "async_mutex"))] impl PrefetchReader { /// Construct a new PrefetchReader, catching the first `prefetch` bytes of the file. pub async fn new(reader: Arc, prefetch: u64) -> AsyncTiffResult { @@ -254,8 +317,10 @@ impl PrefetchReader { } } +#[cfg(any(feature = "tokio", feature = "async_mutex"))] impl AsyncFileReader for PrefetchReader { fn get_metadata_bytes(&self, range: Range) -> BoxFuture<'_, AsyncTiffResult> { + // check prefetch if range.end < self.buffer.len() as _ { let usize_range = range.start as usize..range.end as usize; let result = self.buffer.slice(usize_range); @@ -263,22 +328,27 @@ impl AsyncFileReader for PrefetchReader { } else { async move { { + // check cache let lock = self.tile_info_cache.lock().await; - // let (c_range, cache) = (lock.0, lock.1); if range.start >= lock.0.start && range.end <= lock.0.end { let usize_range = (range.start - lock.0.start) as usize ..(range.end - lock.0.start) as usize; return Ok(lock.1.slice(usize_range)); } } + // determine new cache size let range_len = range.end - range.start; - let estimate = 2 * (range_len + range_len.isqrt()); + let estimate = 2 * (range_len + 2 * range_len.isqrt()).max(8 * 1024); let new_c_range = range.start..range.start + estimate; + + // put in new cache let res = self.reader.get_metadata_bytes(new_c_range.clone()).await?; { let mut lock = self.tile_info_cache.lock().await; *lock = (new_c_range, res.clone()); } + + // yay Ok(res.slice(0..range_len as _)) } .boxed() From feefadde640ba35237bd7a39a8b40a9eacbf8d3a Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Wed, 26 Mar 2025 06:49:57 +0100 Subject: [PATCH 10/11] made minimum block size equal to prefetch, added explanations --- src/reader.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index 48cce78..0c521b6 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -244,7 +244,7 @@ impl AsyncFileReader for Bytes { async { Ok(self.slice(usize_range)) }.boxed() } else { let range_len = range.end - range.start; - let range_read = range_len - (range.end-self.len() as u64); + let range_read = range_len - (range.end - self.len() as u64); async move { Err(AsyncTiffError::EndOfFile(range_len, range_read)) }.boxed() } } @@ -254,7 +254,9 @@ impl AsyncFileReader for Bytes { /// holds a cache for out-of-prefetch metadata. /// /// When a request is out-of-bounds, an estimate of the remaining -/// `tile_offsets`/`tile_byte_counts` array is made as `2*(len+2*len.isqrt())` +/// `tile_offsets`+`tile_byte_counts` array is made as +/// `(2*len+4*len.isqrt()).max(prefetch)`, where `len` is the length of the +/// requested range and `prefetch` is the size of the prefetch buffer. /// /// # Examples /// @@ -280,7 +282,7 @@ impl AsyncFileReader for Bytes { /// *reader.get_metadata_bytes(start..start + len).await?, /// [42; 16 * 1024] /// ); -/// +/// /// // this is now also (exactly) cached /// assert_eq!( /// *reader @@ -291,7 +293,7 @@ impl AsyncFileReader for Bytes { /// /// // this will not check the cache /// reader.get_image_bytes(start..start + len).await?; -/// +/// /// # Ok::<(),AsyncTiffError>(()) /// # }); /// ``` @@ -338,7 +340,20 @@ impl AsyncFileReader for PrefetchReader { } // determine new cache size let range_len = range.end - range.start; - let estimate = 2 * (range_len + 2 * range_len.isqrt()).max(8 * 1024); + // estimate (for bigtiff): + // each overview is 1/4 the previous =geometric series=> 4/3*range_len + // assume request was TileOffsets =Long8+Long=> 3/2*4/3*range_len = 2*range_len + // + // add edge of one overview down: + // n_tiles0 = range_len/8 + // n_tiles1 = 1/4*n_tiles0 + // edge_tiles1 ≈ √n_tiles1 = √range_len/√2⁶ = √range_len/(4√2) + // each edge is 1/2 the previous =geometric series=> 2*edge_tiles1 = √range_len/(2√2) + // edge_bytes = 8*√range_len/(2√2) = 4/√2*√range_len + // Long8+Long => 3/2*4/√2*√range_len = 6/√2*√range_len ≈ 4*√range_len + // + // 2*range_len+4*√range_len + let estimate = (2 * range_len + 4 * range_len.isqrt()).max(self.buffer.len() as _); let new_c_range = range.start..range.start + estimate; // put in new cache From feefaddecfa179e7a0e2ad2ad7733a140744341d Mon Sep 17 00:00:00 2001 From: Fee Fladder Date: Wed, 26 Mar 2025 09:45:33 +0100 Subject: [PATCH 11/11] removed needless compile error --- src/reader.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index 0c521b6..1c4d78d 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -17,8 +17,6 @@ use crate::error::{AsyncTiffError, AsyncTiffResult}; use async_mutex::Mutex; #[cfg(feature = "tokio")] use tokio::sync::Mutex; -#[cfg(not(any(feature = "tokio", feature = "async_mutex")))] -compile_error!("at least one of 'tokio' or 'async_mutex' features should be enabled"); /// The asynchronous interface used to read COG files ///