Skip to content

Commit 627618a

Browse files
committed
Add BigTiff support
1 parent 7445924 commit 627618a

File tree

7 files changed

+231
-76
lines changed

7 files changed

+231
-76
lines changed

src/reader.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ impl Reader {
121121
entries: parser.entries,
122122
entry_map,
123123
little_endian: parser.little_endian,
124+
bigtiff: parser.bigtiff,
124125
};
125126
match parser.continue_on_error {
126127
Some(v) if !v.is_empty() =>
@@ -194,6 +195,8 @@ pub struct Exif {
194195
entry_map: HashMap<(In, Tag), usize>,
195196
// True if the TIFF data is little endian.
196197
little_endian: bool,
198+
/// True if the TIFF data is in the BigTIFF format.
199+
bigtiff: bool,
197200
}
198201

199202
impl Exif {
@@ -207,7 +210,9 @@ impl Exif {
207210
#[inline]
208211
pub fn fields(&self) -> impl ExactSizeIterator<Item = &Field> {
209212
self.entries.iter()
210-
.map(move |e| e.ref_field(&self.buf, self.little_endian))
213+
.map(move |e| {
214+
e.ref_field_(&self.buf, self.little_endian)
215+
})
211216
}
212217

213218
/// Returns true if the Exif data (TIFF structure) is in the
@@ -217,12 +222,18 @@ impl Exif {
217222
self.little_endian
218223
}
219224

225+
/// Returns true if the Exif data (TIFF structure) is in BigTIFF format.
226+
#[inline]
227+
pub fn bigtiff(&self) -> bool {
228+
self.bigtiff
229+
}
230+
220231
/// Returns a reference to the Exif field specified by the tag
221232
/// and the IFD number.
222233
#[inline]
223234
pub fn get_field(&self, tag: Tag, ifd_num: In) -> Option<&Field> {
224235
self.entry_map.get(&(ifd_num, tag))
225-
.map(|&i| self.entries[i].ref_field(&self.buf, self.little_endian))
236+
.map(|&i| self.entries[i].ref_field_(&self.buf, self.little_endian))
226237
}
227238
}
228239

src/tag.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,9 +1442,14 @@ fn d_default(w: &mut dyn fmt::Write, value: &Value) -> fmt::Result {
14421442
Value::SRational(ref v) => d_sub_comma(w, v),
14431443
Value::Float(ref v) => d_sub_comma(w, v),
14441444
Value::Double(ref v) => d_sub_comma(w, v),
1445+
Value::Long8(ref v) => d_sub_comma(w, v),
1446+
Value::SLong8(ref v) => d_sub_comma(w, v),
14451447
Value::Unknown(t, c, o) =>
14461448
write!(w, "unknown value (type={}, count={}, offset={:#x})",
14471449
t, c, o),
1450+
Value::UnknownBigTiff(t, c, o) =>
1451+
write!(w, "unknown value (type={}, count={}, offset={:#x})",
1452+
t, c, o),
14481453
}
14491454
}
14501455

src/tiff.rs

Lines changed: 121 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,11 @@ use crate::value::get_type_info;
3939
const TIFF_BE: u16 = 0x4d4d;
4040
const TIFF_LE: u16 = 0x4949;
4141
const TIFF_FORTY_TWO: u16 = 0x002a;
42+
const TIFF_FORTY_THREE: u16 = 0x002b;
4243
pub const TIFF_BE_SIG: [u8; 4] = [0x4d, 0x4d, 0x00, 0x2a];
4344
pub const TIFF_LE_SIG: [u8; 4] = [0x49, 0x49, 0x2a, 0x00];
45+
pub const BIGTIFF_BE_SIG: [u8; 4] = [0x4d, 0x4d, 0x00, 0x2b];
46+
pub const BIGTIFF_LE_SIG: [u8; 4] = [0x49, 0x49, 0x2b, 0x00];
4447

4548
// Partially parsed TIFF field (IFD entry).
4649
// Value::Unknown is abused to represent a partially parsed value.
@@ -63,34 +66,48 @@ impl IfdEntry {
6366
}
6467
}
6568

66-
pub fn ref_field<'a>(&'a self, data: &[u8], le: bool) -> &'a Field {
67-
self.parse(data, le);
69+
/// Obtain a reference to the field.
70+
/// Only supports regular TIFF.
71+
pub fn ref_field_<'a>(&'a self, data: &[u8], le: bool) -> &'a Field {
72+
self.ref_field_any(data, le, false)
73+
}
74+
75+
/// Obtain a reference to the field.
76+
/// Only supports regular TIFF.
77+
pub fn ref_field_any<'a>(&'a self, data: &[u8], le: bool, bigtiff: bool) -> &'a Field {
78+
self.parse(data, le, bigtiff);
6879
self.field.get_ref()
6980
}
7081

71-
fn into_field(self, data: &[u8], le: bool) -> Field {
72-
self.parse(data, le);
82+
fn into_field(self, data: &[u8], le: bool, bigtiff: bool) -> Field {
83+
self.parse(data, le, bigtiff);
7384
self.field.into_inner()
7485
}
7586

76-
fn parse(&self, data: &[u8], le: bool) {
87+
fn parse(&self, data: &[u8], le: bool, bigtiff: bool) {
7788
if !self.field.is_fixed() {
7889
let mut field = self.field.get_mut();
7990
if le {
80-
Self::parse_value::<LittleEndian>(&mut field.value, data);
91+
Self::parse_value::<LittleEndian>(&mut field.value, data, bigtiff);
8192
} else {
82-
Self::parse_value::<BigEndian>(&mut field.value, data);
93+
Self::parse_value::<BigEndian>(&mut field.value, data, bigtiff);
8394
}
8495
}
8596
}
8697

8798
// Converts a partially parsed value into a real one.
88-
fn parse_value<E>(value: &mut Value, data: &[u8]) where E: Endian {
99+
fn parse_value<E>(value: &mut Value, data: &[u8], bigtiff: bool) where E: Endian {
89100
match *value {
90101
Value::Unknown(typ, cnt, ofs) => {
91102
let (unitlen, parser) = get_type_info::<E>(typ);
92103
if unitlen != 0 {
93-
*value = parser(data, ofs as usize, cnt as usize);
104+
*value = parser(data, ofs as usize, cnt as usize, bigtiff);
105+
}
106+
},
107+
Value::UnknownBigTiff(typ, cnt, ofs) => {
108+
let (unitlen, parser) = get_type_info::<E>(typ);
109+
if unitlen != 0 {
110+
*value = parser(data, ofs as usize, cnt as usize, bigtiff);
94111
}
95112
},
96113
_ => panic!("value is already parsed"),
@@ -153,14 +170,15 @@ impl fmt::Display for In {
153170
pub fn parse_exif(data: &[u8]) -> Result<(Vec<Field>, bool), Error> {
154171
let mut parser = Parser::new();
155172
parser.parse(data)?;
156-
let (entries, le) = (parser.entries, parser.little_endian);
157-
Ok((entries.into_iter().map(|e| e.into_field(data, le)).collect(), le))
173+
let (entries, le, bigtiff) = (parser.entries, parser.little_endian, parser.bigtiff);
174+
Ok((entries.into_iter().map(|e| e.into_field(data, le, bigtiff)).collect(), le))
158175
}
159176

160-
#[derive(Debug)]
177+
#[derive(Debug, Default)]
161178
pub struct Parser {
162179
pub entries: Vec<IfdEntry>,
163180
pub little_endian: bool,
181+
pub bigtiff: bool,
164182
// `Some<Vec>` to enable the option and `None` to disable it.
165183
pub continue_on_error: Option<Vec<Error>>,
166184
}
@@ -170,6 +188,7 @@ impl Parser {
170188
Self {
171189
entries: Vec::new(),
172190
little_endian: false,
191+
bigtiff: false,
173192
continue_on_error: None,
174193
}
175194
}
@@ -194,11 +213,31 @@ impl Parser {
194213

195214
fn parse_header<E>(&mut self, data: &[u8])
196215
-> Result<(), Error> where E: Endian {
197-
// Parse the rest of the header (42 and the IFD offset).
198-
if E::loadu16(data, 2) != TIFF_FORTY_TWO {
199-
return Err(Error::InvalidFormat("Invalid forty two"));
200-
}
201-
let ifd_offset = E::loadu32(data, 4) as usize;
216+
// Parse the rest of the header:
217+
// - 42 and the IFD offset for regular TIFF.
218+
// - 43, a constant, and the u64 IFD offset for BigTIFF.
219+
let tiff_type = E::loadu16(data, 2);
220+
self.bigtiff = if tiff_type == TIFF_FORTY_TWO {
221+
false
222+
} else if tiff_type == TIFF_FORTY_THREE {
223+
// http://bigtiff.org/ describes the BigTIFF header additions as constants 8 and 0.
224+
let offset_bytesize = E::loadu16(data, 4);
225+
if offset_bytesize != 8 {
226+
return Err(Error::InvalidFormat("Invalid BigTIFF offset byte size"));
227+
}
228+
let extra_field = E::loadu16(data, 6);
229+
if extra_field != 0 {
230+
return Err(Error::InvalidFormat("Invalid BigTIFF header"));
231+
}
232+
true
233+
} else {
234+
return Err(Error::InvalidFormat("Invalid TIFF magic number 42 or 43"));
235+
};
236+
let ifd_offset = if self.bigtiff {
237+
E::loadu64(data, 8) as usize
238+
} else {
239+
E::loadu32(data, 4) as usize
240+
};
202241
self.parse_body::<E>(data, ifd_offset)
203242
.or_else(|e| self.check_error(e))
204243
}
@@ -226,19 +265,31 @@ impl Parser {
226265
mut offset: usize, ctx: Context, ifd_num: u16)
227266
-> Result<usize, Error> where E: Endian {
228267
// Count (the number of the entries).
229-
if data.len() < offset || data.len() - offset < 2 {
268+
if !self.bigtiff && (data.len() < offset || data.len() - offset < 2) {
230269
return Err(Error::InvalidFormat("Truncated IFD count"));
231270
}
232-
let count = E::loadu16(data, offset) as usize;
233-
offset += 2;
271+
let count = if self.bigtiff {
272+
E::loadu64(data, offset) as usize
273+
} else {
274+
E::loadu16(data, offset) as usize
275+
};
276+
if self.bigtiff {
277+
offset += 8;
278+
} else {
279+
offset += 2;
280+
}
234281

235282
// Array of entries.
236283
for _ in 0..count {
237-
if data.len() - offset < 12 {
284+
if !self.bigtiff && data.len() - offset < 12 { // fixme
238285
return Err(Error::InvalidFormat("Truncated IFD"));
239286
}
240-
let entry = Self::parse_ifd_entry::<E>(data, offset);
241-
offset += 12;
287+
let entry = self.parse_ifd_entry::<E>(data, offset);
288+
if self.bigtiff {
289+
offset += 20;
290+
} else {
291+
offset += 12;
292+
}
242293
let (tag, value) = match entry {
243294
Ok(x) => x,
244295
Err(e) => {
@@ -265,31 +316,53 @@ impl Parser {
265316
}
266317

267318
// Offset to the next IFD.
268-
if data.len() - offset < 4 {
319+
if !self.bigtiff && data.len() - offset < 4 { // fixme
269320
return Err(Error::InvalidFormat("Truncated next IFD offset"));
270321
}
271-
let next_ifd_offset = E::loadu32(data, offset);
272-
Ok(next_ifd_offset as usize)
322+
let next_ifd_offset = if self.bigtiff {
323+
E::loadu64(data, offset) as usize
324+
} else {
325+
E::loadu32(data, offset) as usize
326+
};
327+
Ok(next_ifd_offset)
273328
}
274329

275-
fn parse_ifd_entry<E>(data: &[u8], offset: usize)
330+
fn parse_ifd_entry<E>(&self, data: &[u8], offset: usize)
276331
-> Result<(u16, Value), Error> where E: Endian {
277332
// The size of entry has been checked in parse_ifd().
278333
let tag = E::loadu16(data, offset);
279334
let typ = E::loadu16(data, offset + 2);
280-
let cnt = E::loadu32(data, offset + 4);
281-
let valofs_at = offset + 8;
335+
336+
let (cnt, valofs_at) = if self.bigtiff {
337+
(E::loadu64(data, offset + 4) as usize, offset + 12)
338+
} else {
339+
(E::loadu32(data, offset + 4) as usize, offset + 8)
340+
};
341+
282342
let (unitlen, _parser) = get_type_info::<E>(typ);
283-
let vallen = unitlen.checked_mul(cnt as usize).ok_or(
343+
let vallen = unitlen.checked_mul(cnt).ok_or(
284344
Error::InvalidFormat("Invalid entry count"))?;
285-
let val = if vallen <= 4 {
286-
Value::Unknown(typ, cnt, valofs_at as u32)
345+
let max_inline_len = if self.bigtiff { 8 } else { 4 };
346+
let val = if vallen <= max_inline_len {
347+
if self.bigtiff {
348+
Value::UnknownBigTiff(typ, cnt as u64, valofs_at as u64)
349+
} else {
350+
Value::Unknown(typ, cnt as u32, valofs_at as u32)
351+
}
287352
} else {
288-
let ofs = E::loadu32(data, valofs_at) as usize;
289-
if data.len() < ofs || data.len() - ofs < vallen {
353+
let ofs = if self.bigtiff {
354+
E::loadu64(data, valofs_at) as usize
355+
} else {
356+
E::loadu32(data, valofs_at) as usize
357+
};
358+
if !self.bigtiff && (data.len() < ofs || data.len() - ofs < vallen) { // fixme
290359
return Err(Error::InvalidFormat("Truncated field value"));
291360
}
292-
Value::Unknown(typ, cnt, ofs as u32)
361+
if self.bigtiff {
362+
Value::UnknownBigTiff(typ, cnt as u64, ofs as u64)
363+
} else {
364+
Value::Unknown(typ, cnt as u32, ofs as u32)
365+
}
293366
};
294367
Ok((tag, val))
295368
}
@@ -298,7 +371,7 @@ impl Parser {
298371
mut pointer: Value, ctx: Context, ifd_num: u16)
299372
-> Result<(), Error> where E: Endian {
300373
// The pointer is not yet parsed, so do it here.
301-
IfdEntry::parse_value::<E>(&mut pointer, data);
374+
IfdEntry::parse_value::<E>(&mut pointer, data, self.bigtiff);
302375

303376
// A pointer field has type == LONG and count == 1, so the
304377
// value (IFD offset) must be embedded in the "value offset"
@@ -323,7 +396,9 @@ impl Parser {
323396
}
324397

325398
pub fn is_tiff(buf: &[u8]) -> bool {
326-
buf.starts_with(&TIFF_BE_SIG) || buf.starts_with(&TIFF_LE_SIG)
399+
buf.starts_with(&TIFF_BE_SIG) || buf.starts_with(&TIFF_LE_SIG) ||
400+
buf.starts_with(&BIGTIFF_BE_SIG) || buf.starts_with(&BIGTIFF_LE_SIG)
401+
327402
}
328403

329404
/// A struct used to parse a DateTime field.
@@ -633,29 +708,30 @@ mod tests {
633708
fn parse_ifd_entry() {
634709
// BYTE (type == 1)
635710
let data = b"\x02\x03\x00\x01\0\0\0\x04ABCD";
636-
assert_pat!(Parser::parse_ifd_entry::<BigEndian>(data, 0).unwrap(),
711+
let parser = Parser::default();
712+
assert_pat!(parser.parse_ifd_entry::<BigEndian>(data, 0).unwrap(),
637713
(0x0203, Value::Unknown(1, 4, 8)));
638714
let data = b"\x02\x03\x00\x01\0\0\0\x05\0\0\0\x0cABCDE";
639-
assert_pat!(Parser::parse_ifd_entry::<BigEndian>(data, 0).unwrap(),
715+
assert_pat!(Parser::default().parse_ifd_entry::<BigEndian>(data, 0).unwrap(),
640716
(0x0203, Value::Unknown(1, 5, 12)));
641717
let data = b"\x02\x03\x00\x01\0\0\0\x05\0\0\0\x0cABCD";
642-
assert_err_pat!(Parser::parse_ifd_entry::<BigEndian>(data, 0),
718+
assert_err_pat!(Parser::default().parse_ifd_entry::<BigEndian>(data, 0),
643719
Error::InvalidFormat("Truncated field value"));
644720

645721
// SHORT (type == 3)
646722
let data = b"X\x04\x05\x00\x03\0\0\0\x02ABCD";
647-
assert_pat!(Parser::parse_ifd_entry::<BigEndian>(data, 1).unwrap(),
723+
assert_pat!(Parser::default().parse_ifd_entry::<BigEndian>(data, 1).unwrap(),
648724
(0x0405, Value::Unknown(3, 2, 9)));
649725
let data = b"X\x04\x05\x00\x03\0\0\0\x03\0\0\0\x0eXABCDEF";
650-
assert_pat!(Parser::parse_ifd_entry::<BigEndian>(data, 1).unwrap(),
726+
assert_pat!(Parser::default().parse_ifd_entry::<BigEndian>(data, 1).unwrap(),
651727
(0x0405, Value::Unknown(3, 3, 14)));
652728
let data = b"X\x04\x05\x00\x03\0\0\0\x03\0\0\0\x0eXABCDE";
653-
assert_err_pat!(Parser::parse_ifd_entry::<BigEndian>(data, 1),
729+
assert_err_pat!(Parser::default().parse_ifd_entry::<BigEndian>(data, 1),
654730
Error::InvalidFormat("Truncated field value"));
655731

656732
// Really unknown
657733
let data = b"X\x01\x02\x03\x04\x05\x06\x07\x08ABCD";
658-
assert_pat!(Parser::parse_ifd_entry::<BigEndian>(data, 1).unwrap(),
734+
assert_pat!(Parser::default().parse_ifd_entry::<BigEndian>(data, 1).unwrap(),
659735
(0x0102, Value::Unknown(0x0304, 0x05060708, 9)));
660736
}
661737

@@ -798,7 +874,7 @@ mod tests {
798874
let mut entries = parser.entries.iter();
799875
$(
800876
assert_pat!(entries.next().unwrap()
801-
.ref_field(data, parser.little_endian),
877+
.ref_field_(data, parser.little_endian),
802878
$fields);
803879
)*
804880
assert_pat!(entries.next(), None);

0 commit comments

Comments
 (0)