From 52bda55e901346a6e22a0afa632e336d52823770 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Sun, 23 Nov 2025 11:52:23 +1300 Subject: [PATCH] fix: Handle non utf-8 characters in OME-XML By using String::from_utf8_lossy. Changed to another sample OME-TIFF file, and increased prefetch bytes to more than 32kb to avoid hanging when parsing the StripOffsets tag. --- src/metadata/reader.rs | 4 +--- tests/ome_tiff.rs | 39 +++++++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/metadata/reader.rs b/src/metadata/reader.rs index ae5de16..2375685 100644 --- a/src/metadata/reader.rs +++ b/src/metadata/reader.rs @@ -614,9 +614,7 @@ async fn read_tag_value( if let Some(first) = out.iter().position(|&b| b == 0) { out.truncate(first); } - Ok(Value::Ascii( - String::from_utf8(out).map_err(|err| AsyncTiffError::General(err.to_string()))?, - )) + Ok(Value::Ascii(String::from_utf8_lossy(&out).into_owned())) } } } diff --git a/tests/ome_tiff.rs b/tests/ome_tiff.rs index 5e5920c..4f1f9b5 100644 --- a/tests/ome_tiff.rs +++ b/tests/ome_tiff.rs @@ -8,12 +8,12 @@ use async_tiff::tiff::tags::PhotometricInterpretation; use async_tiff::TIFF; use reqwest::Url; -async fn open_remote_tiff(url: &str) -> TIFF { +async fn open_remote_tiff(url: &str, prefetch_bytes: u64) -> TIFF { let parsed_url = Url::parse(url).expect("failed parsing url"); let (store, path) = object_store::parse_url(&parsed_url).unwrap(); let reader = Arc::new(ObjectReader::new(Arc::new(store), path)) as Arc; - let prefetch_reader = PrefetchBuffer::new(reader.clone(), 32 * 1024) + let prefetch_reader = PrefetchBuffer::new(reader.clone(), prefetch_bytes) .await .unwrap(); let mut metadata_reader = TiffMetadataReader::try_open(&prefetch_reader) @@ -28,17 +28,44 @@ async fn open_remote_tiff(url: &str) -> TIFF { #[tokio::test] async fn test_ome_tiff_single_channel() { - let tiff = - open_remote_tiff("https://downloads.openmicroscopy.org/images/OME-TIFF/2016-06/bioformats-artificial/single-channel.ome.tif").await; + let tiff = open_remote_tiff( + "https://cildata.crbs.ucsd.edu/media/images/40613/40613.tif", + 32 * 128 * 1024, + ) + .await; - assert_eq!(tiff.ifds().len(), 1); + assert_eq!(tiff.ifds().len(), 3); let ifd = &tiff.ifds()[0]; assert_eq!( ifd.photometric_interpretation(), PhotometricInterpretation::BlackIsZero ); - assert_eq!(ifd.image_description(), Some("urn:uuid:2bc2aa39-30d2-44ee-8399-c513492dd5de")); + assert_eq!( + ifd.image_description(), + Some( + r##" + + + + +2012-03-25 21:26:29.0 + + + + + + + +3af39f55-0ac0-431a-bc60-8f9c3e782b85 + + + + +"## + ) + ); assert!(ifd.bits_per_sample().iter().all(|x| *x == 8)); + assert_eq!(ifd.software(), Some("LOCI Bio-Formats")); }