Skip to content

Commit 5606b62

Browse files
committed
made ifd prefetch its data before processing, read_tag now operates on an EndianAwareReader and Arc<dyn AsyncFileReader>
1 parent 72ffc22 commit 5606b62

File tree

2 files changed

+70
-51
lines changed

2 files changed

+70
-51
lines changed

src/ifd.rs

Lines changed: 54 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::collections::HashMap;
22
use std::io::Read;
33
use std::ops::Range;
4+
use std::sync::Arc;
45

56
use bytes::{buf::Buf, Bytes};
67
use num_enum::TryFromPrimitive;
@@ -195,26 +196,24 @@ impl ImageFileDirectory {
195196
} else {
196197
cursor.read_u16().await?.into()
197198
};
198-
let mut tags = HashMap::with_capacity(tag_count as usize);
199-
for _ in 0..tag_count {
200-
let (tag_name, tag_value) = read_tag(cursor, bigtiff).await?;
201-
tags.insert(tag_name, tag_value);
202-
}
203-
204199
// Tag 2 bytes
205200
// Type 2 bytes
206201
// Count:
207202
// - bigtiff: 8 bytes
208203
// - else: 4 bytes
209204
// Value:
210-
// - bigtiff: 8 bytes either a pointer the value itself
211-
// - else: 4 bytes either a pointer the value itself
205+
// - bigtiff: 8 bytes either a pointer or the value itself
206+
// - else: 4 bytes either a pointer or the value itself
212207
let ifd_entry_byte_size = if bigtiff { 20 } else { 12 };
213-
// The size of `tag_count` that we read above
214-
let tag_count_byte_size = if bigtiff { 8 } else { 2 };
215208

216-
// Reset the cursor position before reading the next ifd offset
217-
cursor.seek(ifd_start + (ifd_entry_byte_size * tag_count) + tag_count_byte_size);
209+
// read all tag data into an EndianAwareReader
210+
let mut reader = cursor.read(ifd_entry_byte_size * tag_count).await?;
211+
212+
let mut tags = HashMap::with_capacity(tag_count as usize);
213+
for _ in 0..tag_count {
214+
let (tag_name, tag_value) = read_tag(&mut reader, cursor.reader(), bigtiff).await?;
215+
tags.insert(tag_name, tag_value);
216+
}
218217

219218
let next_ifd_offset = if bigtiff {
220219
cursor.read_u64().await?
@@ -838,76 +837,80 @@ impl ImageFileDirectory {
838837
}
839838

840839
/// Read a single tag from the cursor
841-
async fn read_tag(cursor: &mut AsyncCursor, bigtiff: bool) -> AsyncTiffResult<(Tag, Value)> {
840+
async fn read_tag(
841+
cursor: &mut EndianAwareReader,
842+
file_reader: &Arc<dyn AsyncFileReader>,
843+
bigtiff: bool,
844+
) -> AsyncTiffResult<(Tag, Value)> {
842845
// let start_cursor_position = cursor.position();
843846

844-
let tag_name = Tag::from_u16_exhaustive(cursor.read_u16().await?);
847+
let tag_name = Tag::from_u16_exhaustive(cursor.read_u16()?);
845848

846-
let tag_type_code = cursor.read_u16().await?;
849+
let tag_type_code = cursor.read_u16()?;
847850
let tag_type = Type::from_u16(tag_type_code).expect(
848851
"Unknown tag type {tag_type_code}. TODO: we should skip entries with unknown tag types.",
849852
);
850853
let count = if bigtiff {
851-
cursor.read_u64().await?
854+
cursor.read_u64()?
852855
} else {
853-
cursor.read_u32().await?.into()
856+
cursor.read_u32()?.into()
854857
};
855858

856-
let tag_value = read_tag_value(cursor, tag_type, count, bigtiff).await?;
857-
858-
// TODO: better handle management of cursor state <- should be done now
859-
// let ifd_entry_size = if bigtiff { 20 } else { 12 };
860-
// cursor.seek(start_cursor_position + ifd_entry_size);
861-
862-
Ok((tag_name, tag_value))
863-
}
864-
865-
/// Read a tag's value from the cursor
866-
///
867-
/// NOTE: this does not maintain cursor state
868-
// This is derived from the upstream tiff crate:
869-
// https://github.com/image-rs/image-tiff/blob/6dc7a266d30291db1e706c8133357931f9e2a053/src/decoder/ifd.rs#L369-L639
870-
async fn read_tag_value(
871-
cursor: &mut AsyncCursor,
872-
tag_type: Type,
873-
count: u64,
874-
bigtiff: bool,
875-
) -> AsyncTiffResult<Value> {
876-
// Case 1: there are no values so we can return immediately.
877-
if count == 0 {
878-
return Ok(Value::List(vec![]));
879-
}
880-
881859
let tag_size = tag_type.size();
882860

883861
let value_byte_length = count.checked_mul(tag_size).unwrap();
884862

885863
// prefetch all tag data
886864
let mut data = if (bigtiff && value_byte_length <= 8) || value_byte_length <= 4 {
887865
// value fits in offset field
888-
let res = cursor.read(value_byte_length).await?;
866+
let mut res = vec![0u8; value_byte_length as usize];
867+
cursor.read_exact(&mut res)?;
889868
if bigtiff {
890-
cursor.advance(8 - value_byte_length);
869+
cursor.advance(8 - value_byte_length)?;
891870
} else {
892-
cursor.advance(4 - value_byte_length);
871+
cursor.advance(4 - value_byte_length)?;
893872
}
894-
res
873+
EndianAwareReader::new(Bytes::from_owner(res).reader(), cursor.endianness())
895874
} else {
896-
// Seek cursor
875+
// fetch using file_reader
897876
let offset = if bigtiff {
898-
cursor.read_u64().await?
877+
cursor.read_u64()?
899878
} else {
900-
cursor.read_u32().await?.into()
879+
cursor.read_u32()?.into()
901880
};
902-
let reader = cursor
903-
.reader()
881+
let reader = file_reader
904882
.get_bytes(offset..offset + value_byte_length)
905883
.await?
906884
.reader();
907885
EndianAwareReader::new(reader, cursor.endianness())
908886
// cursor.seek(offset);
909887
// cursor.read(value_byte_length).await?
910888
};
889+
890+
let tag_value = read_tag_value(&mut data, tag_type, count)?;
891+
892+
// TODO: better handle management of cursor state <- should be done now
893+
// let ifd_entry_size = if bigtiff { 20 } else { 12 };
894+
// cursor.seek(start_cursor_position + ifd_entry_size);
895+
896+
Ok((tag_name, tag_value))
897+
}
898+
899+
/// Read a tag's value from the cursor
900+
///
901+
/// NOTE: this does not maintain cursor state
902+
// This is derived from the upstream tiff crate:
903+
// https://github.com/image-rs/image-tiff/blob/6dc7a266d30291db1e706c8133357931f9e2a053/src/decoder/ifd.rs#L369-L639
904+
fn read_tag_value(
905+
data: &mut EndianAwareReader,
906+
tag_type: Type,
907+
count: u64,
908+
) -> AsyncTiffResult<Value> {
909+
// Case 1: there are no values so we can return immediately.
910+
if count == 0 {
911+
return Ok(Value::List(vec![]));
912+
}
913+
911914
// Case 2: there is one value.
912915
if count == 1 {
913916
return Ok(match tag_type {

src/reader.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,11 +332,13 @@ impl AsyncCursor {
332332
&self.reader
333333
}
334334

335+
#[allow(dead_code)]
335336
pub(crate) fn endianness(&self) -> Endianness {
336337
self.endianness
337338
}
338339

339340
/// Advance cursor position by a set amount
341+
#[allow(dead_code)]
340342
pub(crate) fn advance(&mut self, amount: u64) {
341343
self.offset += amount;
342344
}
@@ -360,6 +362,20 @@ impl EndianAwareReader {
360362
pub(crate) fn new(reader: Reader<Bytes>, endianness: Endianness) -> Self {
361363
Self { reader, endianness }
362364
}
365+
366+
pub(crate) fn endianness(&self) -> Endianness {
367+
self.endianness
368+
}
369+
370+
pub(crate) fn advance(&mut self, amt: u64) -> AsyncTiffResult<u64> {
371+
// TODO: can we use consume?
372+
// from https://stackoverflow.com/a/42247224
373+
Ok(std::io::copy(
374+
&mut self.reader.by_ref().take(amt),
375+
&mut std::io::sink(),
376+
)?)
377+
}
378+
363379
/// Read a u8 from the cursor, advancing the internal state by 1 byte.
364380
pub(crate) fn read_u8(&mut self) -> AsyncTiffResult<u8> {
365381
Ok(self.reader.read_u8()?)

0 commit comments

Comments
 (0)