diff --git a/python/src/tiff.rs b/python/src/tiff.rs index 861c519..e334778 100644 --- a/python/src/tiff.rs +++ b/python/src/tiff.rs @@ -37,7 +37,7 @@ impl PyTIFF { .map_err(|err| PyFileNotFoundError::new_err(err.to_string()))?; let mut metadata_reader = TiffMetadataReader::try_open(&metadata_fetch).await.unwrap(); let ifds = metadata_reader - .read_all_ifds(&metadata_fetch) + .read_all_ifds(&metadata_fetch, Default::default()) .await .unwrap(); let tiff = TIFF::new(ifds); diff --git a/src/cog.rs b/src/cog.rs index 99a0f14..545e263 100644 --- a/src/cog.rs +++ b/src/cog.rs @@ -44,7 +44,7 @@ mod test { .await .unwrap(); let ifds = metadata_reader - .read_all_ifds(&prefetch_reader) + .read_all_ifds(&prefetch_reader, Default::default()) .await .unwrap(); let tiff = TIFF::new(ifds); diff --git a/src/ifd.rs b/src/ifd.rs index f0da9c1..5a032d0 100644 --- a/src/ifd.rs +++ b/src/ifd.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::fmt::Debug; use std::ops::Range; use bytes::Bytes; @@ -6,6 +7,7 @@ use num_enum::TryFromPrimitive; use crate::error::{AsyncTiffError, AsyncTiffResult}; use crate::geo::{GeoKeyDirectory, GeoKeyTag}; +use crate::metadata::extra_tags::ExtraTagsRegistry; use crate::predictor::PredictorInfo; use crate::reader::{AsyncFileReader, Endianness}; use crate::tiff::tags::{ @@ -133,6 +135,8 @@ pub struct ImageFileDirectory { pub(crate) copyright: Option, + pub(crate) extra_tags: ExtraTagsRegistry, + // Geospatial tags pub(crate) geo_key_directory: Option, pub(crate) model_pixel_scale: Option>, @@ -149,6 +153,7 @@ impl ImageFileDirectory { pub fn from_tags( tag_data: HashMap, endianness: Endianness, + extra_tags_registry: ExtraTagsRegistry, ) -> AsyncTiffResult { let mut new_subfile_type = None; let mut image_width = None; @@ -262,8 +267,19 @@ impl ImageFileDirectory { // Tag::GdalNodata // Tags for which the tiff crate doesn't have a hard-coded enum variant Tag::Unknown(DOCUMENT_NAME) => document_name = Some(value.into_string()?), - _ => { - other_tags.insert(tag, value); + t => { + if extra_tags_registry.contains(&t) { + extra_tags_registry[&t].process_tag(t, value).map_err(|e| { + if let AsyncTiffError::InternalTIFFError(err) = e { + err + } else { + // TODO fix error handling. This is bad + TiffError::IntSizeError + } + })?; + } else { + other_tags.insert(tag, value); + } } }; Ok::<_, TiffError>(()) @@ -398,6 +414,7 @@ impl ImageFileDirectory { geo_key_directory, model_pixel_scale, model_tiepoint, + extra_tags: extra_tags_registry, other_tags, }) } @@ -634,6 +651,11 @@ impl ImageFileDirectory { self.model_tiepoint.as_deref() } + /// the registry holding extra tags + pub fn extra_tags(&self) -> &ExtraTagsRegistry { + &self.extra_tags + } + /// Tags for which the tiff crate doesn't have a hard-coded enum variant. pub fn other_tags(&self) -> &HashMap { &self.other_tags diff --git a/src/metadata/extra_tags.rs b/src/metadata/extra_tags.rs new file mode 100644 index 0000000..2258034 --- /dev/null +++ b/src/metadata/extra_tags.rs @@ -0,0 +1,365 @@ +//! # Register parsers for additional tags +//! +//! Simplified example for exif tags parser +//! +//! ``` +//! # use std::sync::{LazyLock, OnceLock, Arc}; +//! # use std::env::current_dir; +//! # use async_tiff::tiff::{Value, tags::Tag}; +//! # use async_tiff::error::AsyncTiffResult; +//! # use async_tiff::reader::{ObjectReader, AsyncFileReader}; +//! # use async_tiff::metadata::TiffMetadataReader; +//! use async_tiff::metadata::extra_tags::{ExtraTags, ExtraTagsRegistry}; +//! # use object_store::local::LocalFileSystem; +//! // see https://www.media.mit.edu/pia/Research/deepview/exif.html#ExifTags +//! // or exif spec: https://www.cipa.jp/std/documents/download_e.html?DC-008-Translation-2023-E +//! // / all tags processed by your extension +//! pub const EXIF_TAGS: [Tag; 3] = [ +//! Tag::Unknown(34665), // Exif IFD pointer +//! Tag::Unknown(34853), // GPS IFD pointer +//! Tag::Unknown(40965), // Interoperability IFD pointer +//! ]; +//! +//! // / the struct that stores the data (using interior mutability) +//! #[derive(Debug, Clone, Default)] +//! pub struct ExifTags { +//! pub exif: OnceLock, +//! pub gps: OnceLock, +//! pub interop: OnceLock, +//! // would also hold e.g. a TiffMetadataReader to read exif IFDs +//! } +//! +//! impl ExtraTags for ExifTags { +//! fn tags(&self) -> &'static [Tag] { +//! &EXIF_TAGS +//! } +//! +//! fn process_tag(&self, tag:Tag, value: Value) -> AsyncTiffResult<()> { +//! match tag { +//! Tag::Unknown(34665) => self.exif.set(value.into_u32()?).unwrap(), +//! Tag::Unknown(34853) => self.gps.set(value.into_u32()?).unwrap(), +//! Tag::Unknown(40965) => self.interop.set(value.into_u32()?).unwrap(), +//! _ => {} +//! } +//! Ok(()) +//! } +//! } +//! +//! #[tokio::main] +//! async fn main() { +//! // create an empty registry +//! let mut registry = ExtraTagsRegistry::new(); +//! // register our custom extra tags +//! registry.register(Arc::new(ExifTags::default())); +//! +//! let store = Arc::new(LocalFileSystem::new_with_prefix(current_dir().unwrap()).unwrap()); +//! let path = "tests/sample-exif.tiff"; +//! let reader = +//! Arc::new(ObjectReader::new(store.clone(), path.into())) as Arc; +//! let mut metadata_reader = TiffMetadataReader::try_open(&reader).await.unwrap(); +//! // get the first ifd +//! let ifd = &metadata_reader +//! .read_all_ifds(&reader, registry) +//! .await +//! .unwrap()[0]; +//! +//! // access by any of our registered tags +//! let exif = ifd.extra_tags()[&EXIF_TAGS[0]] +//! .clone() +//! .as_any_arc() +//! .downcast::() +//! .unwrap(); +//! assert!(exif.exif.get().is_some()); +//! assert!(exif.gps.get().is_some()); +//! // our image doesn't have interop info +//! assert!(exif.interop.get().is_none()); +//! } +//! ``` + +use crate::error::{AsyncTiffError, AsyncTiffResult}; +use crate::tiff::tags::Tag; +use crate::tiff::Value; +use std::any::Any; +use std::collections::{HashMap, HashSet}; +use std::fmt::Debug; +use std::ops::Index; +use std::sync::Arc; + +/// Trait to implement for custom tags, such as Geo, EXIF, OME, etc +/// +/// your type should also implement `Clone` for blanket implementations of [`ExtraTagsBlankets`] +/// +/// ``` +/// # use async_tiff::tiff::{Value, tags::Tag}; +/// # use async_tiff::error::AsyncTiffResult; +/// use async_tiff::metadata::extra_tags::ExtraTags; +/// # use std::sync::OnceLock; +/// +/// pub const EXIF_TAGS: [Tag; 3] = [ +/// Tag::Unknown(34665), // Exif IFD pointer +/// Tag::Unknown(34853), // GPS IFD pointer +/// Tag::Unknown(40965), // Interoperability IFD pointer +/// ]; +/// +/// // / the struct that stores the data (using interior mutability) +/// #[derive(Debug, Clone, Default)] +/// pub struct ExifTags { +/// pub exif: OnceLock, +/// pub gps: OnceLock, +/// pub interop: OnceLock, +/// // would also hold e.g. a TiffMetadataReader to read exif IFDs +/// } +/// +/// impl ExtraTags for ExifTags { +/// fn tags(&self) -> &'static [Tag] { +/// &EXIF_TAGS +/// } +/// +/// fn process_tag(&self, tag:Tag, value: Value) -> AsyncTiffResult<()> { +/// match tag { +/// Tag::Unknown(34665) => self.exif.set(value.into_u32()?).unwrap(), +/// Tag::Unknown(34853) => self.gps.set(value.into_u32()?).unwrap(), +/// Tag::Unknown(40965) => self.interop.set(value.into_u32()?).unwrap(), +/// _ => {} +/// } +/// Ok(()) +/// } +/// } +/// ``` +// Send + Sync are required for Python, where `dyn ExtraTags` needs `Send` and `Sync` +pub trait ExtraTags: ExtraTagsBlankets + Any + Debug + Send + Sync { + /// a list of tags this entry processes + /// + /// e.g. for Geo this would be [34735, 34736, 34737] + fn tags(&self) -> &'static [Tag]; + /// process a single tag, using internal mutability if needed + fn process_tag(&self, tag: Tag, value: Value) -> AsyncTiffResult<()>; +} + +/// Extra trait with blanket implementations for object-safe cloning and casting +/// +/// Automatically implemented if your type implements [`ExtraTags`] and [`Clone`] +/// +/// ``` +/// # use std::sync::Arc; +/// # use async_tiff::tiff::{Value, tags::Tag}; +/// # use async_tiff::error::AsyncTiffResult; +/// use async_tiff::metadata::extra_tags::ExtraTags; +/// // derive these +/// #[derive(Debug, Clone)] +/// pub struct MyTags; +/// +/// // custom functionality +/// impl MyTags { +/// fn forty_two(&self) -> u32 {42} +/// } +/// +/// // implement ExtraTags +/// impl ExtraTags for MyTags { +/// fn tags(&self) -> &'static [Tag] { +/// &[] +/// } +/// +/// fn process_tag(&self, _tag:Tag, _value:Value) -> AsyncTiffResult<()> { +/// Ok(()) +/// } +/// } +/// +/// fn main() { +/// // allows for deep cloning +/// let my_tags = Arc::new(MyTags) as Arc; +/// let other_my_tags = my_tags.clone_arc(); +/// assert!(Arc::ptr_eq(&my_tags, &my_tags.clone())); +/// assert!(!Arc::ptr_eq(&my_tags, &other_my_tags)); +/// +/// // and downcasting +/// let my_tags_concrete = my_tags.as_any_arc().downcast::().unwrap(); +/// assert_eq!(my_tags_concrete.forty_two(), 42); +/// } +/// ``` +/// +/// This works since blanket implementations are done on concrete types and only +/// their signatures (function pointer) will end up in the vtable +/// +pub trait ExtraTagsBlankets { + /// deep clone + fn clone_arc(&self) -> Arc; + /// convert to any for downcasting + fn as_any_arc(self: Arc) -> Arc; +} + +impl ExtraTagsBlankets for T +where + T: 'static + ExtraTags + Clone, +{ + fn clone_arc(&self) -> Arc { + Arc::new(self.clone()) + } + + fn as_any_arc(self: Arc) -> Arc { + self + } +} + +/// The registry in which extra tags (parsers) are registered +/// +/// Pass this to [`crate::metadata::TiffMetadataReader`] when reading. +/// +/// ``` +/// # use async_tiff::reader::{AsyncFileReader, ObjectReader}; +/// # use async_tiff::metadata::TiffMetadataReader; +/// use async_tiff::metadata::extra_tags::ExtraTagsRegistry; +/// # use std::sync::Arc; +/// # use std::env::current_dir; +/// # use object_store::local::LocalFileSystem; +/// +/// #[tokio::main] +/// async fn main() { +/// let registry = ExtraTagsRegistry::default(); +/// +/// let store = Arc::new(LocalFileSystem::new_with_prefix(current_dir().unwrap()).unwrap()); +/// # let path = "tests/sample-exif.tiff"; +/// let reader = +/// Arc::new(ObjectReader::new(store.clone(), path.into())) as Arc; +/// let mut metadata_reader = TiffMetadataReader::try_open(&reader).await.unwrap(); +/// // get first ifd +/// let ifd = &metadata_reader +/// .read_all_ifds(&reader, registry) +/// .await +/// .unwrap()[0]; +/// // retrieve the registry +/// println!("{:?}",ifd.extra_tags()); +/// } +/// ``` +/// +#[derive(Debug, Clone)] +pub struct ExtraTagsRegistry(HashMap>); + +impl ExtraTagsRegistry { + /// Create a new, empty `ExtraTagsRegistry` + pub fn new() -> Self { + Self(HashMap::new()) + } + /// checks if we have an entry for this tag + pub fn contains(&self, tag: &Tag) -> bool { + self.0.contains_key(tag) + } + /// Register an ExtraTags so their tags are parsed and stored in the ifd's `extra_tags`` + pub fn register(&mut self, tags: Arc) -> AsyncTiffResult<()> { + // check for duplicates + for tag in tags.tags() { + if self.0.contains_key(tag) { + return Err(AsyncTiffError::General(format!( + "Tag {tag:?} already registered in {self:?}!" + ))); + } + } + // add to self + for tag in tags.tags() { + self.0.insert(*tag, tags.clone()); + } + Ok(()) + } + + /// deep clone so we have different registries per IFD + pub(crate) fn deep_clone(&self) -> Self { + let mut new_registry = ExtraTagsRegistry::new(); + + // we need to do some magic, because we can have multiple tags pointing to the same arc + let mut seen = HashSet::new(); + for extra_tags in self.0.values() { + // only add if this is the first encountered reference to this arc + // (using thin pointer equality: https://stackoverflow.com/a/67114787/14681457 ; https://github.com/rust-lang/rust/issues/46139#issuecomment-346971153) + if seen.insert(Arc::as_ptr(extra_tags) as *const ()) { + if let Err(e) = new_registry.register(extra_tags.clone_arc()) { + panic!("{e}"); + } + } + } + + new_registry + } +} + +impl Default for ExtraTagsRegistry { + fn default() -> Self { + Self::new() // add e.g. geo tags later + } +} + +impl Index<&Tag> for ExtraTagsRegistry { + type Output = Arc; + fn index(&self, index: &Tag) -> &Self::Output { + &self.0[index] + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::LazyLock; + + #[derive(Debug, Clone, PartialEq)] + struct TestyTag; + + static TESTY_TAGS: LazyLock> = LazyLock::new(|| { + vec![ + Tag::from_u16_exhaustive(u16::MAX), + Tag::from_u16_exhaustive(u16::MAX - 1), + ] + }); + + impl ExtraTags for TestyTag { + fn tags(&self) -> &'static [Tag] { + &TESTY_TAGS + } + + fn process_tag( + &self, + tag: Tag, + value: crate::tiff::Value, + ) -> crate::error::AsyncTiffResult<()> { + println!("received {tag:?}: {value:?}"); + Ok(()) + } + } + + #[test] + fn test_register() { + let mut registry = ExtraTagsRegistry::new(); + assert!(registry.0.is_empty()); + let a1: Arc = Arc::new(TestyTag); + registry.register(a1.clone()).unwrap(); + assert_eq!(registry.0.len(), TestyTag.tags().len()); + for tag in a1.tags() { + // very strict equality check + assert!(Arc::ptr_eq(®istry.0[tag], &a1)); + } + } + + #[test] + fn test_overlap_err() { + let mut registry = ExtraTagsRegistry::new(); + assert!(registry.0.is_empty()); + registry.register(Arc::new(TestyTag)).unwrap(); + assert!(matches!( + registry.register(Arc::new(TestyTag)).unwrap_err(), + AsyncTiffError::General(_) + )); + } + + #[test] + fn test_deep_clone() { + let mut registry = ExtraTagsRegistry::new(); + let a1: Arc = Arc::new(TestyTag); + registry.register(a1.clone()).unwrap(); + let r2 = registry.deep_clone(); + for tags in a1.tags().windows(2) { + // all should refer to the same Arc + assert!(Arc::ptr_eq(&r2.0[&tags[0]], &r2.0[&tags[1]])); + // which is different from the previous + assert!(!Arc::ptr_eq(&a1, &r2.0[&tags[0]])); + assert!(!Arc::ptr_eq(&a1, &r2.0[&tags[1]])); + } + } +} diff --git a/src/metadata/mod.rs b/src/metadata/mod.rs index 3592014..b86ca18 100644 --- a/src/metadata/mod.rs +++ b/src/metadata/mod.rs @@ -40,7 +40,7 @@ //! //! // Read all IFDs out of the source. //! let ifds = metadata_reader -//! .read_all_ifds(&prefetch_reader) +//! .read_all_ifds(&prefetch_reader, Default::default()) //! .await //! .unwrap(); //! # }) @@ -58,8 +58,10 @@ //! fetches the first `N` bytes out of a file. //! +pub mod extra_tags; mod fetch; mod reader; +// pub use extra_tags::{ExtraTags, ExtraTagsRegistry}; pub use fetch::{MetadataFetch, PrefetchBuffer}; pub use reader::{ImageFileDirectoryReader, TiffMetadataReader}; diff --git a/src/metadata/reader.rs b/src/metadata/reader.rs index 6a72ff9..cdb49c9 100644 --- a/src/metadata/reader.rs +++ b/src/metadata/reader.rs @@ -4,6 +4,7 @@ use std::io::Read; use bytes::Bytes; use crate::error::{AsyncTiffError, AsyncTiffResult}; +use crate::metadata::extra_tags::ExtraTagsRegistry; use crate::metadata::fetch::MetadataCursor; use crate::metadata::MetadataFetch; use crate::reader::Endianness; @@ -110,12 +111,13 @@ impl TiffMetadataReader { pub async fn read_next_ifd( &mut self, fetch: &F, + extra_tags_registry: ExtraTagsRegistry, ) -> AsyncTiffResult> { if let Some(ifd_start) = self.next_ifd_offset { let ifd_reader = ImageFileDirectoryReader::open(fetch, ifd_start, self.bigtiff, self.endianness) .await?; - let ifd = ifd_reader.read(fetch).await?; + let ifd = ifd_reader.read(fetch, extra_tags_registry).await?; let next_ifd_offset = ifd_reader.finish(fetch).await?; self.next_ifd_offset = next_ifd_offset; Ok(Some(ifd)) @@ -128,9 +130,14 @@ impl TiffMetadataReader { pub async fn read_all_ifds( &mut self, fetch: &F, + extra_tags_registry: ExtraTagsRegistry, ) -> AsyncTiffResult> { let mut ifds = vec![]; - while let Some(ifd) = self.read_next_ifd(fetch).await? { + // deep clone the extra_tags_registry so we can have different values + while let Some(ifd) = self + .read_next_ifd(fetch, extra_tags_registry.deep_clone()) + .await? + { ifds.push(ifd); } Ok(ifds) @@ -220,13 +227,17 @@ impl ImageFileDirectoryReader { /// /// Keep in mind that you'll still need to call [`finish`][Self::finish] to get the byte offset /// of the next IFD. - pub async fn read(&self, fetch: &F) -> AsyncTiffResult { + pub async fn read( + &self, + fetch: &F, + extra_tags_registry: ExtraTagsRegistry, + ) -> AsyncTiffResult { let mut tags = HashMap::with_capacity(self.tag_count as usize); for tag_idx in 0..self.tag_count { let (tag, value) = self.read_tag(fetch, tag_idx).await?; tags.insert(tag, value); } - ImageFileDirectory::from_tags(tags, self.endianness) + ImageFileDirectory::from_tags(tags, self.endianness, extra_tags_registry) } /// Finish this reader, reading the byte offset of the next IFD diff --git a/src/predictor.rs b/src/predictor.rs index 34d6fe5..a206821 100644 --- a/src/predictor.rs +++ b/src/predictor.rs @@ -168,6 +168,7 @@ pub(crate) fn unpredict_hdiff( // From image-tiff /// /// This should be used _after_ endianness fixing +/// pub fn rev_hpredict_nsamp(buf: &mut [u8], bit_depth: u16, samples: usize) { match bit_depth { 0..=8 => { diff --git a/tests/image_tiff/util.rs b/tests/image_tiff/util.rs index 0b7e0c3..b00a4ad 100644 --- a/tests/image_tiff/util.rs +++ b/tests/image_tiff/util.rs @@ -14,6 +14,9 @@ pub(crate) async fn open_tiff(filename: &str) -> TIFF { let reader = Arc::new(ObjectReader::new(store.clone(), path.as_str().into())) as Arc; let mut metadata_reader = TiffMetadataReader::try_open(&reader).await.unwrap(); - let ifds = metadata_reader.read_all_ifds(&reader).await.unwrap(); + let ifds = metadata_reader + .read_all_ifds(&reader, Default::default()) + .await + .unwrap(); TIFF::new(ifds) } diff --git a/tests/sample-exif.tiff b/tests/sample-exif.tiff new file mode 100644 index 0000000..015cd39 Binary files /dev/null and b/tests/sample-exif.tiff differ diff --git a/tests/util/mod.rs b/tests/util/mod.rs index 8b3fa07..7eae8b1 100644 --- a/tests/util/mod.rs +++ b/tests/util/mod.rs @@ -17,7 +17,7 @@ pub(crate) async fn open_remote_tiff(url: &str) -> TIFF { .await .unwrap(); let ifds = metadata_reader - .read_all_ifds(&prefetch_reader) + .read_all_ifds(&prefetch_reader, Default::default()) .await .unwrap(); TIFF::new(ifds)