diff --git a/Cargo.toml b/Cargo.toml index 3c58d05..864fe3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ description = "Low-level asynchronous TIFF reader." readme = "README.md" [dependencies] +bytemuck = { version = "1.24.0", features = ["extern_crate_std"] } byteorder = "1" bytes = "1.7.0" flate2 = "1.0.20" diff --git a/src/error.rs b/src/error.rs index 8c63711..91b5a09 100644 --- a/src/error.rs +++ b/src/error.rs @@ -42,6 +42,10 @@ pub enum AsyncTiffError { #[error(transparent)] ReqwestError(#[from] reqwest::Error), + /// Bytemuck error + #[error(transparent)] + BytemuckError(#[from] bytemuck::PodCastError), + /// External error #[error(transparent)] External(Box), diff --git a/src/metadata/fetch.rs b/src/metadata/fetch.rs index 126b3ae..8535830 100644 --- a/src/metadata/fetch.rs +++ b/src/metadata/fetch.rs @@ -6,6 +6,13 @@ use futures::FutureExt; use crate::error::AsyncTiffResult; use crate::reader::{AsyncFileReader, EndianAwareReader, Endianness}; +use crate::tiff::Value; + +#[cfg(target_endian = "little")] +const MACHINE_ENDIANNESS: Endianness = Endianness::LittleEndian; + +#[cfg(target_endian = "big")] +const MACHINE_ENDIANNESS: Endianness = Endianness::BigEndian; /// A data source that can be used with [`TiffMetadataReader`] and [`ImageFileDirectoryReader`] to /// load [`ImageFileDirectory`]s. @@ -105,14 +112,19 @@ impl<'a, F: MetadataFetch> MetadataCursor<'a, F> { Ok(EndianAwareReader::new(bytes, self.endianness)) } - /// Read a u8 from the cursor, advancing the internal state by 1 byte. - pub(crate) async fn read_u8(&mut self) -> AsyncTiffResult { - self.read(1).await?.read_u8() + /// Read `n` u8s from the cursor, advancing the internal state by `n` bytes. + pub(crate) async fn read_u8_n(&mut self, n: u64) -> AsyncTiffResult> { + let (buf, _endianness) = self.read(n).await?.into_inner(); + Ok(buf.into()) } - /// Read a i8 from the cursor, advancing the internal state by 1 byte. - pub(crate) async fn read_i8(&mut self) -> AsyncTiffResult { - self.read(1).await?.read_i8() + /// Read `n` i8s from the cursor, advancing the internal state by `n` bytes. + pub(crate) async fn read_i8_n(&mut self, n: u64) -> AsyncTiffResult { + let (buf, _endianness) = self.read(n).await?.into_inner(); + let values: &[i8] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::SignedByte).collect(), + )) } /// Read a u16 from the cursor, advancing the internal state by 2 bytes. @@ -120,9 +132,44 @@ impl<'a, F: MetadataFetch> MetadataCursor<'a, F> { self.read(2).await?.read_u16() } - /// Read a i16 from the cursor, advancing the internal state by 2 bytes. - pub(crate) async fn read_i16(&mut self) -> AsyncTiffResult { - self.read(2).await?.read_i16() + /// Read `n` u16s from the cursor, advancing the internal state by `n * 2` bytes. + pub(crate) async fn read_u16_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 2).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[u16] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::Short).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::Short(reader.read_u16()?)) + } + Ok(Value::List(v)) + } + } + + /// Read `n` i16s from the cursor, advancing the internal state by `n * 2` bytes. + pub(crate) async fn read_i16_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 2).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[i16] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::SignedShort).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::SignedShort(reader.read_i16()?)) + } + Ok(Value::List(v)) + } } /// Read a u32 from the cursor, advancing the internal state by 4 bytes. @@ -130,26 +177,182 @@ impl<'a, F: MetadataFetch> MetadataCursor<'a, F> { self.read(4).await?.read_u32() } + /// Read `n` u32s from the cursor, advancing the internal state by `n * 4` bytes. + pub(crate) async fn read_u32_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 4).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[u32] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::Unsigned).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::Unsigned(reader.read_u32()?)) + } + Ok(Value::List(v)) + } + } + + /// Read `n` Value::IFDs from the cursor, advancing the internal state by `n * 4` bytes. + pub(crate) async fn read_ifd_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 4).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[u32] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::Ifd).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::Ifd(reader.read_u32()?)) + } + Ok(Value::List(v)) + } + } + /// Read a i32 from the cursor, advancing the internal state by 4 bytes. pub(crate) async fn read_i32(&mut self) -> AsyncTiffResult { self.read(4).await?.read_i32() } + /// Read `n` i32s from the cursor, advancing the internal state by `n * 4` bytes. + pub(crate) async fn read_i32_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 4).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[i32] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::Signed).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::Signed(reader.read_i32()?)) + } + Ok(Value::List(v)) + } + } + /// Read a u64 from the cursor, advancing the internal state by 8 bytes. pub(crate) async fn read_u64(&mut self) -> AsyncTiffResult { self.read(8).await?.read_u64() } + /// Read `n` u64s from the cursor, advancing the internal state by `n * 8` bytes. + pub(crate) async fn read_u64_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 8).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[u64] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::UnsignedBig).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::UnsignedBig(reader.read_u64()?)) + } + Ok(Value::List(v)) + } + } + + /// Read `n` u64s from the cursor, advancing the internal state by `n * 8` bytes. + pub(crate) async fn read_ifd8_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 8).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[u64] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::IfdBig).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::IfdBig(reader.read_u64()?)) + } + Ok(Value::List(v)) + } + } + /// Read a i64 from the cursor, advancing the internal state by 8 bytes. pub(crate) async fn read_i64(&mut self) -> AsyncTiffResult { self.read(8).await?.read_i64() } - pub(crate) async fn read_f32(&mut self) -> AsyncTiffResult { - self.read(4).await?.read_f32() + /// Read `n` i64s from the cursor, advancing the internal state by `n * 8` bytes. + pub(crate) async fn read_i64_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 8).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[i64] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::SignedBig).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::SignedBig(reader.read_i64()?)) + } + Ok(Value::List(v)) + } + } + + /// Read `n` f32s from the cursor, advancing the internal state by `n * 4` bytes. + pub(crate) async fn read_f32_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 4).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[f32] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::Float).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::Float(reader.read_f32()?)) + } + Ok(Value::List(v)) + } } pub(crate) async fn read_f64(&mut self) -> AsyncTiffResult { self.read(8).await?.read_f64() } + + /// Read `n` f64s from the cursor, advancing the internal state by `n * 8` bytes. + pub(crate) async fn read_f64_n(&mut self, n: u64) -> AsyncTiffResult { + let mut reader = self.read(n * 8).await?; + + // If the endianness matches the machine endianness, we can do a direct cast. + if self.endianness == MACHINE_ENDIANNESS { + let (buf, _endianness) = reader.into_inner(); + let values: &[f64] = bytemuck::try_cast_slice(&buf)?; + Ok(Value::List( + values.iter().copied().map(Value::Double).collect(), + )) + } else { + let mut v = Vec::with_capacity(n as _); + for _ in 0..n { + v.push(Value::Double(reader.read_f64()?)) + } + Ok(Value::List(v)) + } + } } diff --git a/src/metadata/reader.rs b/src/metadata/reader.rs index ae5de16..6f6df84 100644 --- a/src/metadata/reader.rs +++ b/src/metadata/reader.rs @@ -502,62 +502,18 @@ async fn read_tag_value( // TODO check if this could give wrong results // at a different endianess of file/computer. Type::BYTE | Type::UNDEFINED => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Byte(cursor.read_u8().await?)) - } - Ok(Value::List(v)) - } - Type::SBYTE => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::SignedByte(cursor.read_i8().await?)) - } - Ok(Value::List(v)) - } - Type::SHORT => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Short(cursor.read_u16().await?)) - } - Ok(Value::List(v)) - } - Type::SSHORT => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::SignedShort(cursor.read_i16().await?)) - } - Ok(Value::List(v)) - } - Type::LONG => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Unsigned(cursor.read_u32().await?)) - } - Ok(Value::List(v)) - } - Type::SLONG => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Signed(cursor.read_i32().await?)) - } - Ok(Value::List(v)) - } - Type::FLOAT => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Float(cursor.read_f32().await?)) - } - Ok(Value::List(v)) - } - Type::DOUBLE => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Double(cursor.read_f64().await?)) - } - Ok(Value::List(v)) + let values = cursor.read_u8_n(count).await?; + Ok(Value::List(values.into_iter().map(Value::Byte).collect())) } + Type::SBYTE => cursor.read_i8_n(count).await, + Type::SHORT => cursor.read_u16_n(count).await, + Type::SSHORT => cursor.read_i16_n(count).await, + Type::LONG => cursor.read_u32_n(count).await, + Type::SLONG => cursor.read_i32_n(count).await, + Type::FLOAT => cursor.read_f32_n(count).await, + Type::DOUBLE => cursor.read_f64_n(count).await, Type::RATIONAL => { + // TODO: implement batched reading, i.e. make cursor method for this like above let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::Rational( @@ -568,6 +524,7 @@ async fn read_tag_value( Ok(Value::List(v)) } Type::SRATIONAL => { + // TODO: implement batched reading, i.e. make cursor method for this like above let mut v = Vec::with_capacity(count as _); for _ in 0..count { v.push(Value::SRational( @@ -577,34 +534,10 @@ async fn read_tag_value( } Ok(Value::List(v)) } - Type::LONG8 => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::UnsignedBig(cursor.read_u64().await?)) - } - Ok(Value::List(v)) - } - Type::SLONG8 => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::SignedBig(cursor.read_i64().await?)) - } - Ok(Value::List(v)) - } - Type::IFD => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::Ifd(cursor.read_u32().await?)) - } - Ok(Value::List(v)) - } - Type::IFD8 => { - let mut v = Vec::with_capacity(count as _); - for _ in 0..count { - v.push(Value::IfdBig(cursor.read_u64().await?)) - } - Ok(Value::List(v)) - } + Type::LONG8 => cursor.read_u64_n(count).await, + Type::SLONG8 => cursor.read_i64_n(count).await, + Type::IFD => cursor.read_ifd_n(count).await, + Type::IFD8 => cursor.read_ifd8_n(count).await, Type::ASCII => { let mut out = vec![0; count as _]; let mut buf = cursor.read(count).await?; @@ -626,6 +559,7 @@ mod test { use bytes::Bytes; use futures::FutureExt; + use crate::error::AsyncTiffError; use crate::metadata::reader::read_tag; use crate::metadata::MetadataFetch; use crate::reader::Endianness; @@ -921,7 +855,15 @@ mod test { for (buf, byte_order, res) in cases { println!("reading {buf:?} to be {res:?}"); let fetch = Bytes::from_owner(buf); - assert_eq!(read_tag(&fetch, 0, byte_order, true).await.unwrap(), (Tag::from_u16_exhaustive(0x0101), res)) + match read_tag(&fetch, 0, byte_order, true).await { + Ok((tag, value)) => { + assert_eq!(tag, Tag::from_u16_exhaustive(0x0101)); + assert_eq!(value, res); + } + Err(error) => { + assert!(matches!(error, AsyncTiffError::BytemuckError(_)), "an error should be from failing to cast byte slice") + } + } } } } diff --git a/src/reader.rs b/src/reader.rs index da330b4..78c0a49 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -229,7 +229,7 @@ impl AsyncFileReader for ReqwestReader { } /// Endianness -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum Endianness { /// Little Endian LittleEndian, @@ -316,9 +316,8 @@ impl EndianAwareReader { } } - #[allow(dead_code)] - pub(crate) fn into_inner(self) -> (Reader, Endianness) { - (self.reader, self.endianness) + pub(crate) fn into_inner(self) -> (Bytes, Endianness) { + (self.reader.into_inner(), self.endianness) } }