Skip to content

Commit 349c582

Browse files
committed
Fix TIFF EXIF extraction when using stock libexif
Refactors PNG and TIFF loaders to robustly extract, parse, and assign EXIF metadata using a new extract_tiff_exif_blob function. This allows extracting EXIF from TIFFs without passing the entire file to libexif (which fails with the stock libexif). Introduces write_as and write_array utilities for endian-safe writing, and improves read_array for performance. Adds deep copy constructor and assignment to Exif.
1 parent 24c0c10 commit 349c582

File tree

5 files changed

+247
-32
lines changed

5 files changed

+247
-32
lines changed

src/common.h

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -353,10 +353,8 @@ T read_as(const unsigned char *ptr, Endian data_endian)
353353
T value;
354354
memcpy(&value, ptr, sizeof(T));
355355

356-
// Swap bytes if data endianness doesn't match machine endianness
357-
bool data_is_big_endian = (data_endian == Endian::Big);
358-
bool need_swap = data_is_big_endian != !is_little_endian();
359-
if (need_swap)
356+
// Only swap bytes if necessary
357+
if (data_endian != host_endian())
360358
value = swap_bytes(value);
361359

362360
return value;
@@ -377,7 +375,59 @@ T read_as(const unsigned char *ptr, Endian data_endian)
377375
template <typename T>
378376
void read_array(T *output, const unsigned char *input, size_t count, Endian data_endian)
379377
{
380-
for (size_t i = 0; i < count; i++) output[i] = read_as<T>(input + i * sizeof(T), data_endian);
378+
// First, copy all bytes at once
379+
memcpy(output, input, count * sizeof(T));
380+
381+
// Only swap bytes if necessary
382+
if (data_endian != host_endian())
383+
for (size_t i = 0; i < count; i++) output[i] = swap_bytes(output[i]);
384+
}
385+
386+
/*!
387+
* @brief Write a value of type T to a byte array with specified endianness.
388+
*
389+
* Writes sizeof(T) bytes to the given pointer, performing byte swapping if the target
390+
* endianness differs from the machine's endianness.
391+
*
392+
* @tparam T The type to write (e.g., float, double, uint32_t)
393+
* @param ptr Pointer to the byte array to write to
394+
* @param value The value to write
395+
* @param target_endian The desired endianness for the data in the byte array
396+
*/
397+
template <typename T>
398+
void write_as(unsigned char *ptr, T value, Endian target_endian)
399+
{
400+
// Swap bytes if target endianness doesn't match machine endianness
401+
if (target_endian != host_endian())
402+
value = swap_bytes(value);
403+
404+
memcpy(ptr, &value, sizeof(T));
405+
}
406+
407+
/*!
408+
* @brief Write an array of values of type T to a byte array with specified endianness.
409+
*
410+
* Writes count * sizeof(T) bytes to the output pointer, performing byte swapping on each
411+
* element if the target endianness differs from the machine's.
412+
*
413+
* @tparam T The type to write (e.g., float, double, uint32_t, int32_t)
414+
* @param output Pointer to the output byte array to write to
415+
* @param input Pointer to the input array of values
416+
* @param count Number of elements to write
417+
* @param target_endian The desired endianness for the data in the output byte array
418+
*/
419+
template <typename T>
420+
void write_array(unsigned char *output, const T *input, size_t count, Endian target_endian)
421+
{
422+
// First, copy all bytes at once
423+
memcpy(output, input, count * sizeof(T));
424+
425+
// Only swap bytes if necessary
426+
if (target_endian != host_endian())
427+
{
428+
T *output_typed = reinterpret_cast<T *>(output);
429+
for (size_t i = 0; i < count; i++) output_typed[i] = swap_bytes(output_typed[i]);
430+
}
381431
}
382432

383433
template <typename T>

src/imageio/exif.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,22 @@ Exif::Exif(const uint8_t *data_ptr, size_t data_size) : m_impl(std::make_unique<
200200
}
201201
}
202202

203-
Exif::Exif(Exif &&other) noexcept = default;
203+
Exif::Exif(Exif &&other) noexcept = default;
204+
205+
// Copy constructor: reconstruct from the other's raw data
206+
Exif::Exif(const Exif &other) : Exif(other.data(), other.size()) {}
207+
208+
// Copy assignment: reconstruct from the other's raw data
209+
Exif &Exif::operator=(const Exif &other)
210+
{
211+
if (this != &other)
212+
{
213+
// Use copy-and-swap idiom via move assignment
214+
*this = Exif(other.data(), other.size());
215+
}
216+
return *this;
217+
}
218+
204219
Exif &Exif::operator=(Exif &&) noexcept = default;
205220
Exif::~Exif() = default;
206221

@@ -479,14 +494,14 @@ json Exif::to_json() const
479494
if (!m_impl || !m_impl->exif_data)
480495
throw std::runtime_error{"Failed to parse EXIF data."};
481496

482-
auto ed = m_impl->exif_data.get();
483-
json j;
497+
const auto &ed = m_impl->exif_data.get();
498+
json j;
484499

485500
static const char *ExifIfdTable[] = {"TIFF IFD0", "TIFF IFD1", "EXIF", "GPS", "Interoperability"};
486501

487502
for (int ifd_idx = 0; ifd_idx < EXIF_IFD_COUNT; ++ifd_idx)
488503
{
489-
ExifContent *content = ed->ifd[ifd_idx];
504+
auto content = ed->ifd[ifd_idx];
490505
if (!content || !content->count)
491506
continue;
492507

src/imageio/exif.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ class Exif
1818
public:
1919
Exif(const uint8_t *data_ptr = nullptr, size_t data_size = 0U);
2020
Exif(const std::vector<uint8_t> &data) : Exif(data.data(), data.size()) {}
21+
Exif(const Exif &other); /// Performs a deep copy
2122
Exif(Exif &&other) noexcept;
22-
Exif &operator=(const Exif &);
23+
Exif &operator=(const Exif &); /// Performs a deep copy
2324
Exif &operator=(Exif &&) noexcept;
2425
~Exif();
2526

src/imageio/png.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -599,18 +599,24 @@ vector<ImagePtr> load_png_image(istream &is, string_view filename, const ImageLo
599599
else
600600
metadata["pixel format"] = fmt::format("{}-bit ({} bpc)", channels * file_bit_depth, file_bit_depth);
601601

602-
try
602+
// Parse EXIF metadata using libexif
603+
Exif exif;
604+
if (!exif_data.empty())
603605
{
604-
if (!exif_data.empty())
606+
spdlog::debug("Found EXIF data of size {} bytes", exif_data.size());
607+
608+
try
605609
{
606-
Exif exif{reinterpret_cast<const uint8_t *>(exif_data.data()), exif_data.size()};
607-
metadata["exif"] = exif.to_json();
608-
spdlog::debug("EXIF metadata successfully parsed: {}", metadata["exif"].dump(2));
610+
exif = Exif{reinterpret_cast<const uint8_t *>(exif_data.data()), exif_data.size()};
611+
auto exif_json = exif.to_json();
612+
metadata["exif"] = exif_json;
613+
spdlog::debug("EXIF metadata successfully parsed: {}", exif_json.dump(2));
614+
}
615+
catch (const std::exception &e)
616+
{
617+
spdlog::warn("Exception while parsing EXIF chunk: {}", e.what());
618+
exif.reset();
609619
}
610-
}
611-
catch (const std::exception &e)
612-
{
613-
spdlog::warn("Exception while parsing EXIF chunk: {}", e.what());
614620
}
615621

616622
png_uint_32 num_frames = 0, num_plays = 0;
@@ -669,7 +675,8 @@ vector<ImagePtr> load_png_image(istream &is, string_view filename, const ImageLo
669675
image->alpha_type = size.z == 4 || size.z == 2 ? AlphaType_Straight : AlphaType_None;
670676
image->chromaticities = chr;
671677
image->metadata = metadata;
672-
image->exif = Exif{reinterpret_cast<const uint8_t *>(exif_data.data()), exif_data.size()};
678+
if (exif.valid())
679+
image->exif = exif;
673680

674681
if (animation)
675682
{

src/imageio/tiff.cpp

Lines changed: 154 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,131 @@ vector<ImagePtr> load_image(TIFF *tif, bool reverse_endian, tdir_t dir, int sub_
948948
return images;
949949
}
950950

951+
/// Extract EXIF data from TIFF file as a blob suitable for libexif parsing.
952+
/// Returns a vector with "Exif\0\0" header followed by minimal TIFF structure.
953+
/// Returns empty vector if extraction fails or is not possible.
954+
vector<uint8_t> extract_tiff_exif_blob(const vector<uint8_t> &data, bool reverse_endian)
955+
{
956+
if (data.size() < 8)
957+
return {};
958+
959+
try
960+
{
961+
// Determine TIFF endianness
962+
Endian tiff_endian = reverse_endian ? (is_little_endian() ? Endian::Big : Endian::Little)
963+
: (is_little_endian() ? Endian::Little : Endian::Big);
964+
uint32_t first_ifd_offset = read_as<uint32_t>(&data[4], tiff_endian);
965+
966+
spdlog::debug("First IFD offset: {} (0x{:08X})", first_ifd_offset, first_ifd_offset);
967+
968+
if (first_ifd_offset <= 8 || first_ifd_offset >= data.size())
969+
return {};
970+
971+
// Construct a minimal TIFF blob: header + IFD + trailing data
972+
// We include extra data after the IFD to capture offset-referenced values
973+
const size_t ifd_buffer_size = 256 * 1024; // 256KB should cover most metadata
974+
size_t blob_data_size = std::min(ifd_buffer_size, data.size() - first_ifd_offset);
975+
size_t blob_size = 8 + blob_data_size; // header + IFD data
976+
977+
vector<uint8_t> tiff_blob(blob_size);
978+
979+
// Copy TIFF header (8 bytes)
980+
memcpy(tiff_blob.data(), data.data(), 8);
981+
982+
// Modify the IFD offset to point right after the header (offset 8)
983+
write_as<uint32_t>(tiff_blob.data() + 4, 8, tiff_endian);
984+
985+
// Copy IFD and trailing data
986+
memcpy(tiff_blob.data() + 8, data.data() + first_ifd_offset, blob_data_size);
987+
988+
// Now we need to adjust any offsets in the IFD entries
989+
// IFD structure: 2-byte count, then 12-byte entries
990+
uint16_t num_entries = read_as<uint16_t>(tiff_blob.data() + 8, tiff_endian);
991+
spdlog::debug("IFD has {} entries", num_entries);
992+
993+
// Adjust offsets in IFD entries (subtract the old IFD offset, add 8)
994+
int32_t offset_delta = 8 - (int32_t)first_ifd_offset;
995+
996+
for (uint16_t i = 0; i < num_entries && i < 1000; ++i) // sanity limit
997+
{
998+
size_t entry_offset = 8 + 2 + (i * 12); // header + count + entry
999+
uint16_t tag = read_as<uint16_t>(tiff_blob.data() + entry_offset + 0, tiff_endian);
1000+
uint16_t type = read_as<uint16_t>(tiff_blob.data() + entry_offset + 2, tiff_endian);
1001+
uint32_t count = read_as<uint32_t>(tiff_blob.data() + entry_offset + 4, tiff_endian);
1002+
uint32_t value_offset = read_as<uint32_t>(tiff_blob.data() + entry_offset + 8, tiff_endian);
1003+
1004+
// Determine if this is an offset or inline value
1005+
// Values > 4 bytes are stored as offsets
1006+
size_t type_size = 0;
1007+
switch (type)
1008+
{
1009+
case 1: type_size = 1; break; // BYTE
1010+
case 2: type_size = 1; break; // ASCII
1011+
case 3: type_size = 2; break; // SHORT
1012+
case 4: type_size = 4; break; // LONG
1013+
case 5: type_size = 8; break; // RATIONAL
1014+
case 6: type_size = 1; break; // SBYTE
1015+
case 7: type_size = 1; break; // UNDEFINED
1016+
case 8: type_size = 2; break; // SSHORT
1017+
case 9: type_size = 4; break; // SLONG
1018+
case 10: type_size = 8; break; // SRATIONAL
1019+
case 11: type_size = 4; break; // FLOAT
1020+
case 12: type_size = 8; break; // DOUBLE
1021+
default: type_size = 1; break;
1022+
}
1023+
1024+
size_t value_size = type_size * count;
1025+
1026+
// If value doesn't fit inline (> 4 bytes), it's an offset that needs adjusting
1027+
if (value_size > 4 && value_offset >= first_ifd_offset && value_offset < first_ifd_offset + blob_data_size)
1028+
{
1029+
uint32_t new_offset = value_offset + offset_delta;
1030+
write_as<uint32_t>(tiff_blob.data() + entry_offset + 8, new_offset, tiff_endian);
1031+
spdlog::debug("Adjusted tag 0x{:04X} offset from {} to {}", tag, value_offset, new_offset);
1032+
}
1033+
}
1034+
1035+
// Adjust the "next IFD" offset at the end of the IFD
1036+
size_t next_ifd_offset_pos = 8 + 2 + (num_entries * 12);
1037+
if (next_ifd_offset_pos + 4 <= tiff_blob.size())
1038+
{
1039+
uint32_t next_ifd = read_as<uint32_t>(tiff_blob.data() + next_ifd_offset_pos, tiff_endian);
1040+
if (next_ifd > 0 && next_ifd >= first_ifd_offset && next_ifd < first_ifd_offset + blob_data_size)
1041+
{
1042+
write_as<uint32_t>(tiff_blob.data() + next_ifd_offset_pos, next_ifd + offset_delta, tiff_endian);
1043+
spdlog::debug("Adjusted next IFD offset from {} to {}", next_ifd, next_ifd + offset_delta);
1044+
}
1045+
else if (next_ifd > 0)
1046+
{
1047+
// Next IFD is outside our buffer, set to 0 (no next IFD)
1048+
write_as<uint32_t>(tiff_blob.data() + next_ifd_offset_pos, 0, tiff_endian);
1049+
spdlog::debug("Set next IFD offset to 0 (was {})", next_ifd);
1050+
}
1051+
}
1052+
1053+
// Prepend "Exif\0\0" header if not already present
1054+
if (tiff_blob.size() < 6 || memcmp(tiff_blob.data(), "Exif\0\0", 6) != 0)
1055+
{
1056+
vector<uint8_t> exif_blob(6 + tiff_blob.size());
1057+
exif_blob[0] = 'E';
1058+
exif_blob[1] = 'x';
1059+
exif_blob[2] = 'i';
1060+
exif_blob[3] = 'f';
1061+
exif_blob[4] = 0;
1062+
exif_blob[5] = 0;
1063+
memcpy(exif_blob.data() + 6, tiff_blob.data(), tiff_blob.size());
1064+
return exif_blob;
1065+
}
1066+
1067+
return tiff_blob;
1068+
}
1069+
catch (const std::exception &e)
1070+
{
1071+
spdlog::debug("Failed to extract EXIF blob: {}", e.what());
1072+
return {};
1073+
}
1074+
}
1075+
9511076
vector<ImagePtr> load_sub_images(TIFF *tif, bool reverse_endian, tdir_t dir, const ImageLoadOptions &opts)
9521077
{
9531078
vector<ImagePtr> images;
@@ -1040,17 +1165,28 @@ vector<ImagePtr> load_tiff_image(istream &is, string_view filename, const ImageL
10401165

10411166
auto tif_guard = ScopeGuard{[tif] { TIFFClose(tif); }};
10421167

1043-
// Extract EXIF data once for all images/sub-images
1044-
json exif_json;
1045-
try
1046-
{
1047-
exif_json = exif_to_json(data.data(), data.size());
1048-
if (!exif_json.empty())
1049-
spdlog::debug("EXIF metadata successfully parsed");
1050-
}
1051-
catch (const std::exception &e)
1168+
// Extract EXIF/TIFF metadata using libexif
1169+
Exif exif;
1170+
json exif_json;
1171+
vector<uint8_t> exif_blob = extract_tiff_exif_blob(data, reverse_endian);
1172+
if (!exif_blob.empty())
10521173
{
1053-
spdlog::warn("Exception while parsing EXIF data: {}", e.what());
1174+
spdlog::debug("Found EXIF data of size {} bytes", exif_blob.size());
1175+
1176+
try
1177+
{
1178+
exif = Exif{exif_blob};
1179+
exif_json = exif.to_json();
1180+
if (!exif_json.empty())
1181+
spdlog::debug("TIFF/EXIF metadata successfully parsed");
1182+
else
1183+
spdlog::debug("EXIF blob extracted but parsing returned empty result");
1184+
}
1185+
catch (const std::exception &e)
1186+
{
1187+
spdlog::warn("Exception while parsing EXIF data: {}", e.what());
1188+
exif.reset();
1189+
}
10541190
}
10551191

10561192
vector<ImagePtr> images;
@@ -1065,8 +1201,11 @@ vector<ImagePtr> load_tiff_image(istream &is, string_view filename, const ImageL
10651201
{
10661202
image->filename = filename;
10671203
// Use pre-parsed EXIF data
1068-
if (!exif_json.empty())
1204+
if (exif.valid())
1205+
{
1206+
image->exif = exif;
10691207
image->metadata["exif"] = exif_json;
1208+
}
10701209
images.push_back(image);
10711210
}
10721211

@@ -1075,8 +1214,11 @@ vector<ImagePtr> load_tiff_image(istream &is, string_view filename, const ImageL
10751214
{
10761215
sub_image->filename = filename;
10771216
// Use pre-parsed EXIF data
1078-
if (!exif_json.empty())
1217+
if (exif.valid())
1218+
{
1219+
sub_image->exif = exif;
10791220
sub_image->metadata["exif"] = exif_json;
1221+
}
10801222
images.push_back(sub_image);
10811223
}
10821224

0 commit comments

Comments
 (0)