Skip to content

Commit 96b60d0

Browse files
committed
feat: Add support for more tags from Canon CR3 files
Remove MultiExifIter, since it seems preferable to support CR3 files from the same API as everything else rather than forcing applications to know while files might need a different API. Instead refactor ExifIter to handle discontiguous regions of a file that have EXIF data. Use this to handle CR3 files that spread EXIF data across multiple boxes (CMT1, CMT2, CMT3). Update the CR3 test case to check that the correct information is returned for the test file.
1 parent 99c891e commit 96b60d0

File tree

7 files changed

+308
-619
lines changed

7 files changed

+308
-619
lines changed

src/bbox/cr3_moov.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,24 @@ impl Cr3MoovBox {
142142
// For CR3, we primarily use CMT1 which contains the main EXIF IFD0 data
143143
self.uuid_canon_box.as_ref()?.exif_data_offset().cloned()
144144
}
145+
146+
/// Returns offset ranges for all CMT boxes (CMT1, CMT2, CMT3).
147+
/// CMT1 is the primary EXIF data, CMT2 is ExifIFD data, CMT3 is MakerNotes.
148+
pub fn all_cmt_data_offsets(&self) -> Vec<(&'static str, Range<usize>)> {
149+
let Some(uuid_box) = self.uuid_canon_box.as_ref() else {
150+
return Vec::new();
151+
};
152+
153+
let mut offsets = Vec::with_capacity(3);
154+
if let Some(range) = uuid_box.exif_data_offset() {
155+
offsets.push(("CMT1", range.clone()));
156+
}
157+
if let Some(range) = uuid_box.cmt2_data_offset() {
158+
offsets.push(("CMT2", range.clone()));
159+
}
160+
if let Some(range) = uuid_box.cmt3_data_offset() {
161+
offsets.push(("CMT3", range.clone()));
162+
}
163+
offsets
164+
}
145165
}

src/cr3.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::ops::Range;
2+
13
use nom::IResult;
24

35
use crate::{
@@ -11,6 +13,54 @@ pub(crate) fn parse_moov_box(input: &[u8]) -> IResult<&[u8], Option<Cr3MoovBox>>
1113
Cr3MoovBox::parse(input)
1214
}
1315

16+
/// Result containing all CMT ranges for CR3 files.
17+
/// Each tuple contains (block_id, data_range).
18+
#[derive(Debug, Clone)]
19+
pub(crate) struct Cr3CmtRanges {
20+
/// All CMT ranges: (block_id, range)
21+
pub ranges: Vec<(&'static str, Range<usize>)>,
22+
}
23+
24+
/// Extract all CMT data ranges from a CR3 file buffer.
25+
/// Returns the moov box and all CMT ranges if available.
26+
pub(crate) fn extract_all_cmt_ranges(
27+
buf: &[u8],
28+
) -> Result<Option<Cr3CmtRanges>, ParsingErrorState> {
29+
let (_, moov) =
30+
parse_moov_box(buf).map_err(|e| nom_error_to_parsing_error_with_state(e, None))?;
31+
32+
let Some(moov) = moov else {
33+
return Ok(None);
34+
};
35+
36+
let ranges = moov.all_cmt_data_offsets();
37+
if ranges.is_empty() {
38+
return Err(ParsingErrorState::new(
39+
ParsingError::Failed(
40+
"CR3 file contains no EXIF data: Canon UUID box found but no CMT offsets available"
41+
.into(),
42+
),
43+
None,
44+
));
45+
}
46+
47+
// Validate all ranges are within buffer bounds
48+
for (block_id, range) in &ranges {
49+
if range.end > buf.len() {
50+
// For now, we'll skip validation and let it fail later if needed
51+
// This matches the behavior of the original extract_exif_data
52+
tracing::warn!(
53+
block_id,
54+
range_end = range.end,
55+
buf_len = buf.len(),
56+
"CMT range extends beyond buffer"
57+
);
58+
}
59+
}
60+
61+
Ok(Some(Cr3CmtRanges { ranges }))
62+
}
63+
1464
pub(crate) fn extract_exif_data(
1565
state: Option<ParsingState>,
1666
buf: &[u8],

src/exif.rs

Lines changed: 64 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ use exif_exif::TIFF_HEADER_LEN;
1212
use exif_iter::input_into_iter;
1313
pub use exif_iter::{ExifIter, ParsedExifEntry};
1414
pub use gps::{GPSInfo, LatLng};
15-
pub use multi_exif::{DuplicateStrategy, MultiExifIter};
1615
pub use tags::ExifTag;
1716

1817
use std::io::Read;
@@ -25,7 +24,6 @@ pub(crate) use travel::IfdHeaderTravel;
2524
mod exif_exif;
2625
mod exif_iter;
2726
mod gps;
28-
mod multi_exif;
2927
mod tags;
3028
mod travel;
3129

@@ -74,44 +72,85 @@ pub(crate) fn parse_exif_iter<R: Read, S: Skip<R>>(
7472
mime_img: MimeImage,
7573
reader: &mut R,
7674
) -> Result<ExifIter, crate::Error> {
75+
// For CR3 files, we need special handling to get all CMT blocks
76+
if mime_img == MimeImage::Cr3 {
77+
return parse_cr3_exif_iter::<R, S>(parser, reader);
78+
}
79+
7780
let out = parser.load_and_parse::<R, S, _, _>(reader, |buf, state| {
7881
extract_exif_range(mime_img, buf, state)
7982
})?;
8083

8184
range_to_iter(parser, out)
8285
}
8386

87+
/// Special parser for CR3 files that extracts all CMT blocks (CMT1, CMT2, CMT3)
88+
/// and adds them as additional TIFF blocks to the ExifIter.
8489
#[tracing::instrument(skip(reader))]
85-
pub(crate) fn parse_multi_exif_iter<R: Read, S: Skip<R>>(
90+
fn parse_cr3_exif_iter<R: Read, S: Skip<R>>(
8691
parser: &mut MediaParser,
87-
mime_img: MimeImage,
8892
reader: &mut R,
89-
) -> Result<MultiExifIter, crate::Error> {
90-
if mime_img != MimeImage::Cr3 {
91-
return Err(format!("MultiExifIter is not supported for {mime_img:?}").into());
92-
}
93+
) -> Result<ExifIter, crate::Error> {
94+
use crate::parser::Buf;
9395

94-
let mut iter = MultiExifIter::new(DuplicateStrategy::IgnoreDuplicates);
96+
// First, parse to get all CMT ranges
97+
let cmt_ranges = parser.load_and_parse::<R, S, _, _>(reader, |buf, _state| {
98+
cr3::extract_all_cmt_ranges(buf)
99+
})?;
95100

96-
// TODO: The following is only demonstration code.
97-
// Please make further modifications based on the CR3 file structure.
98-
// For example, the `parse` callback of `load_and_parse` should be reimplemented
99-
// to correctly parse the next CMT* box.
101+
let Some(cmt_ranges) = cmt_ranges else {
102+
return Err("CR3: No CMT data found".into());
103+
};
100104

101-
loop {
102-
let out = parser.load_and_parse::<R, S, _, _>(reader, |buf, state| {
103-
extract_exif_range(mime_img, buf, state)
104-
})?;
105-
if out.is_none() {
106-
break;
105+
if cmt_ranges.ranges.is_empty() {
106+
return Err("CR3: No CMT ranges available".into());
107+
}
108+
109+
tracing::debug!(
110+
cmt_count = cmt_ranges.ranges.len(),
111+
"Found CMT ranges in CR3 file"
112+
);
113+
114+
// Get the parser position offset - share_buf will add this to ranges
115+
let position_offset = parser.position();
116+
117+
// Get the first CMT range (CMT1) to create the primary ExifIter
118+
let (first_block_id, first_range) = &cmt_ranges.ranges[0];
119+
tracing::debug!(
120+
block_id = first_block_id,
121+
range = ?first_range,
122+
position_offset,
123+
"Creating primary ExifIter from first CMT block"
124+
);
125+
126+
// Share the buffer and create the primary ExifIter
127+
// Note: share_buf adds position_offset to the range internally
128+
let input: PartialVec = parser.share_buf(first_range.clone());
129+
let mut iter = input_into_iter(input, None)?;
130+
131+
// Add remaining CMT blocks as additional TIFF blocks
132+
// We need to adjust the ranges by position_offset since the PartialVec.data
133+
// contains the full buffer and ranges need to be absolute
134+
// Note: We skip CMT3 (MakerNotes) as it has a proprietary format that requires
135+
// special handling and would produce garbage data if parsed as standard EXIF
136+
for (block_id, range) in cmt_ranges.ranges.iter().skip(1) {
137+
// Skip CMT3 (MakerNotes) - it has a proprietary Canon format
138+
if *block_id == "CMT3" {
139+
tracing::debug!(
140+
block_id,
141+
"Skipping CMT3 (MakerNotes) - proprietary format"
142+
);
143+
continue;
107144
}
108145

109-
// TODO: The current `block_id` should be returned via the `load_and_parse` call.
110-
let block_id = "CMT1";
111-
let data = out
112-
.map(|(range, _)| parser.share_buf(range))
113-
.ok_or_else(|| format!("Exif not found in block {block_id}"))?;
114-
iter.add_tiff_data(block_id.to_owned(), data, None);
146+
let adjusted_range = (range.start + position_offset)..(range.end + position_offset);
147+
tracing::debug!(
148+
block_id,
149+
original_range = ?range,
150+
adjusted_range = ?adjusted_range,
151+
"Adding additional CMT block"
152+
);
153+
iter.add_tiff_block(block_id.to_string(), adjusted_range, None);
115154
}
116155

117156
Ok(iter)

0 commit comments

Comments
 (0)