Skip to content

Commit 1d74e3b

Browse files
committed
made ifd prefetch its data before processing, read_tag now operates on an EndianAwareReader and Arc<dyn AsyncFileReader>
1 parent 1d0bcd7 commit 1d74e3b

File tree

2 files changed

+68
-47
lines changed

2 files changed

+68
-47
lines changed

src/ifd.rs

Lines changed: 54 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::collections::HashMap;
22
use std::io::Read;
33
use std::ops::Range;
4+
use std::sync::Arc;
45

56
use bytes::{buf::Buf, Bytes};
67
use num_enum::TryFromPrimitive;
@@ -195,26 +196,24 @@ impl ImageFileDirectory {
195196
} else {
196197
cursor.read_u16().await?.into()
197198
};
198-
let mut tags = HashMap::with_capacity(tag_count as usize);
199-
for _ in 0..tag_count {
200-
let (tag_name, tag_value) = read_tag(cursor, bigtiff).await?;
201-
tags.insert(tag_name, tag_value);
202-
}
203-
204199
// Tag 2 bytes
205200
// Type 2 bytes
206201
// Count:
207202
// - bigtiff: 8 bytes
208203
// - else: 4 bytes
209204
// Value:
210-
// - bigtiff: 8 bytes either a pointer the value itself
211-
// - else: 4 bytes either a pointer the value itself
205+
// - bigtiff: 8 bytes either a pointer or the value itself
206+
// - else: 4 bytes either a pointer or the value itself
212207
let ifd_entry_byte_size = if bigtiff { 20 } else { 12 };
213-
// The size of `tag_count` that we read above
214-
let tag_count_byte_size = if bigtiff { 8 } else { 2 };
215208

216-
// Reset the cursor position before reading the next ifd offset
217-
cursor.seek(ifd_start + (ifd_entry_byte_size * tag_count) + tag_count_byte_size);
209+
// read all tag data into an EndianAwareReader
210+
let mut reader = cursor.read(ifd_entry_byte_size * tag_count).await?;
211+
212+
let mut tags = HashMap::with_capacity(tag_count as usize);
213+
for _ in 0..tag_count {
214+
let (tag_name, tag_value) = read_tag(&mut reader, cursor.reader(), bigtiff).await?;
215+
tags.insert(tag_name, tag_value);
216+
}
218217

219218
let next_ifd_offset = if bigtiff {
220219
cursor.read_u64().await?
@@ -838,22 +837,57 @@ impl ImageFileDirectory {
838837
}
839838

840839
/// Read a single tag from the cursor
841-
async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(Tag, Value)> {
840+
async fn read_tag(
841+
cursor: &mut EndianAwareReader,
842+
file_reader: &Arc<dyn AsyncFileReader>,
843+
bigtiff: bool,
844+
) -> AsyncTiffResult<(Tag, Value)> {
842845
// let start_cursor_position = cursor.position();
843846

844-
let tag_name = Tag::from_u16_exhaustive(cursor.read_u16().await?);
847+
let tag_name = Tag::from_u16_exhaustive(cursor.read_u16()?);
845848

846-
let tag_type_code = cursor.read_u16().await?;
849+
let tag_type_code = cursor.read_u16()?;
847850
let tag_type = Type::from_u16(tag_type_code).expect(
848851
"Unknown tag type {tag_type_code}. TODO: we should skip entries with unknown tag types.",
849852
);
850853
let count = if bigtiff {
851-
cursor.read_u64().await?
854+
cursor.read_u64()?
852855
} else {
853-
cursor.read_u32().await?.into()
856+
cursor.read_u32()?.into()
854857
};
855858

856-
let tag_value = read_tag_value(cursor, tag_type, count, bigtiff).await?;
859+
let tag_size = tag_type.size();
860+
861+
let value_byte_length = count.checked_mul(tag_size).unwrap();
862+
863+
// prefetch all tag data
864+
let mut data = if (bigtiff && value_byte_length <= 8) || value_byte_length <= 4 {
865+
// value fits in offset field
866+
let mut res = vec![0u8; value_byte_length as usize];
867+
cursor.read_exact(&mut res)?;
868+
if bigtiff {
869+
cursor.advance(8 - value_byte_length)?;
870+
} else {
871+
cursor.advance(4 - value_byte_length)?;
872+
}
873+
EndianAwareReader::new(Bytes::from_owner(res).reader(), cursor.endianness())
874+
} else {
875+
// fetch using file_reader
876+
let offset = if bigtiff {
877+
cursor.read_u64()?
878+
} else {
879+
cursor.read_u32()?.into()
880+
};
881+
let reader = file_reader
882+
.get_bytes(offset..offset + value_byte_length)
883+
.await?
884+
.reader();
885+
EndianAwareReader::new(reader, cursor.endianness())
886+
// cursor.seek(offset);
887+
// cursor.read(value_byte_length).await?
888+
};
889+
890+
let tag_value = read_tag_value(&mut data, tag_type, count)?;
857891

858892
// TODO: better handle management of cursor state <- should be done now
859893
// let ifd_entry_size = if bigtiff { 20 } else { 12 };
@@ -867,43 +901,16 @@ async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(T
867901
/// NOTE: this does not maintain cursor state
868902
// This is derived from the upstream tiff crate:
869903
// https://github.com/image-rs/image-tiff/blob/6dc7a266d30291db1e706c8133357931f9e2a053/src/decoder/ifd.rs#L369-L639
870-
async fn read_tag_value(
871-
cursor: &mut AsyncCursor,
904+
fn read_tag_value(
905+
data: &mut EndianAwareReader,
872906
tag_type: Type,
873907
count: u64,
874-
bigtiff: bool,
875908
) -> AsyncTiffResult<Value> {
876909
// Case 1: there are no values so we can return immediately.
877910
if count == 0 {
878911
return Ok(Value::List(vec![]));
879912
}
880913

881-
let tag_size = tag_type.size();
882-
883-
let value_byte_length = count.checked_mul(tag_size).unwrap();
884-
885-
// prefetch all tag data
886-
let mut data = if (bigtiff && value_byte_length <= 8) || value_byte_length <= 4 {
887-
// value fits in offset field
888-
let res = cursor.read(value_byte_length).await?;
889-
if bigtiff {
890-
cursor.advance(8-value_byte_length);
891-
} else {
892-
cursor.advance(4-value_byte_length);
893-
}
894-
res
895-
} else {
896-
// Seek cursor
897-
let offset = if bigtiff {
898-
cursor.read_u64().await?
899-
} else {
900-
cursor.read_u32().await?.into()
901-
};
902-
let reader = cursor.reader().get_bytes(offset..offset+value_byte_length).await?.reader();
903-
EndianAwareReader::new(reader, cursor.endianness())
904-
// cursor.seek(offset);
905-
// cursor.read(value_byte_length).await?
906-
};
907914
// Case 2: there is one value.
908915
if count == 1 {
909916
return Ok(match tag_type {

src/reader.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,20 @@ impl EndianAwareReader {
352352
pub(crate) fn new(reader: Reader<Bytes>, endianness: Endianness) -> Self {
353353
Self { reader, endianness }
354354
}
355+
356+
pub(crate) fn endianness(&self) -> Endianness {
357+
self.endianness
358+
}
359+
360+
pub(crate) fn advance(&mut self, amt: u64) -> AsyncTiffResult<u64> {
361+
// TODO: can we use consume?
362+
// from https://stackoverflow.com/a/42247224
363+
Ok(std::io::copy(
364+
&mut self.reader.by_ref().take(amt),
365+
&mut std::io::sink(),
366+
)?)
367+
}
368+
355369
/// Read a u8 from the cursor, advancing the internal state by 1 byte.
356370
pub(crate) fn read_u8(&mut self) -> AsyncTiffResult<u8> {
357371
Ok(self.reader.read_u8()?)

0 commit comments

Comments
 (0)