@@ -948,6 +948,131 @@ vector<ImagePtr> load_image(TIFF *tif, bool reverse_endian, tdir_t dir, int sub_
948948 return images;
949949}
950950
951+ // / Extract EXIF data from TIFF file as a blob suitable for libexif parsing.
952+ // / Returns a vector with "Exif\0\0" header followed by minimal TIFF structure.
953+ // / Returns empty vector if extraction fails or is not possible.
954+ vector<uint8_t > extract_tiff_exif_blob (const vector<uint8_t > &data, bool reverse_endian)
955+ {
956+ if (data.size () < 8 )
957+ return {};
958+
959+ try
960+ {
961+ // Determine TIFF endianness
962+ Endian tiff_endian = reverse_endian ? (is_little_endian () ? Endian::Big : Endian::Little)
963+ : (is_little_endian () ? Endian::Little : Endian::Big);
964+ uint32_t first_ifd_offset = read_as<uint32_t >(&data[4 ], tiff_endian);
965+
966+ spdlog::debug (" First IFD offset: {} (0x{:08X})" , first_ifd_offset, first_ifd_offset);
967+
968+ if (first_ifd_offset <= 8 || first_ifd_offset >= data.size ())
969+ return {};
970+
971+ // Construct a minimal TIFF blob: header + IFD + trailing data
972+ // We include extra data after the IFD to capture offset-referenced values
973+ const size_t ifd_buffer_size = 256 * 1024 ; // 256KB should cover most metadata
974+ size_t blob_data_size = std::min (ifd_buffer_size, data.size () - first_ifd_offset);
975+ size_t blob_size = 8 + blob_data_size; // header + IFD data
976+
977+ vector<uint8_t > tiff_blob (blob_size);
978+
979+ // Copy TIFF header (8 bytes)
980+ memcpy (tiff_blob.data (), data.data (), 8 );
981+
982+ // Modify the IFD offset to point right after the header (offset 8)
983+ write_as<uint32_t >(tiff_blob.data () + 4 , 8 , tiff_endian);
984+
985+ // Copy IFD and trailing data
986+ memcpy (tiff_blob.data () + 8 , data.data () + first_ifd_offset, blob_data_size);
987+
988+ // Now we need to adjust any offsets in the IFD entries
989+ // IFD structure: 2-byte count, then 12-byte entries
990+ uint16_t num_entries = read_as<uint16_t >(tiff_blob.data () + 8 , tiff_endian);
991+ spdlog::debug (" IFD has {} entries" , num_entries);
992+
993+ // Adjust offsets in IFD entries (subtract the old IFD offset, add 8)
994+ int32_t offset_delta = 8 - (int32_t )first_ifd_offset;
995+
996+ for (uint16_t i = 0 ; i < num_entries && i < 1000 ; ++i) // sanity limit
997+ {
998+ size_t entry_offset = 8 + 2 + (i * 12 ); // header + count + entry
999+ uint16_t tag = read_as<uint16_t >(tiff_blob.data () + entry_offset + 0 , tiff_endian);
1000+ uint16_t type = read_as<uint16_t >(tiff_blob.data () + entry_offset + 2 , tiff_endian);
1001+ uint32_t count = read_as<uint32_t >(tiff_blob.data () + entry_offset + 4 , tiff_endian);
1002+ uint32_t value_offset = read_as<uint32_t >(tiff_blob.data () + entry_offset + 8 , tiff_endian);
1003+
1004+ // Determine if this is an offset or inline value
1005+ // Values > 4 bytes are stored as offsets
1006+ size_t type_size = 0 ;
1007+ switch (type)
1008+ {
1009+ case 1 : type_size = 1 ; break ; // BYTE
1010+ case 2 : type_size = 1 ; break ; // ASCII
1011+ case 3 : type_size = 2 ; break ; // SHORT
1012+ case 4 : type_size = 4 ; break ; // LONG
1013+ case 5 : type_size = 8 ; break ; // RATIONAL
1014+ case 6 : type_size = 1 ; break ; // SBYTE
1015+ case 7 : type_size = 1 ; break ; // UNDEFINED
1016+ case 8 : type_size = 2 ; break ; // SSHORT
1017+ case 9 : type_size = 4 ; break ; // SLONG
1018+ case 10 : type_size = 8 ; break ; // SRATIONAL
1019+ case 11 : type_size = 4 ; break ; // FLOAT
1020+ case 12 : type_size = 8 ; break ; // DOUBLE
1021+ default : type_size = 1 ; break ;
1022+ }
1023+
1024+ size_t value_size = type_size * count;
1025+
1026+ // If value doesn't fit inline (> 4 bytes), it's an offset that needs adjusting
1027+ if (value_size > 4 && value_offset >= first_ifd_offset && value_offset < first_ifd_offset + blob_data_size)
1028+ {
1029+ uint32_t new_offset = value_offset + offset_delta;
1030+ write_as<uint32_t >(tiff_blob.data () + entry_offset + 8 , new_offset, tiff_endian);
1031+ spdlog::debug (" Adjusted tag 0x{:04X} offset from {} to {}" , tag, value_offset, new_offset);
1032+ }
1033+ }
1034+
1035+ // Adjust the "next IFD" offset at the end of the IFD
1036+ size_t next_ifd_offset_pos = 8 + 2 + (num_entries * 12 );
1037+ if (next_ifd_offset_pos + 4 <= tiff_blob.size ())
1038+ {
1039+ uint32_t next_ifd = read_as<uint32_t >(tiff_blob.data () + next_ifd_offset_pos, tiff_endian);
1040+ if (next_ifd > 0 && next_ifd >= first_ifd_offset && next_ifd < first_ifd_offset + blob_data_size)
1041+ {
1042+ write_as<uint32_t >(tiff_blob.data () + next_ifd_offset_pos, next_ifd + offset_delta, tiff_endian);
1043+ spdlog::debug (" Adjusted next IFD offset from {} to {}" , next_ifd, next_ifd + offset_delta);
1044+ }
1045+ else if (next_ifd > 0 )
1046+ {
1047+ // Next IFD is outside our buffer, set to 0 (no next IFD)
1048+ write_as<uint32_t >(tiff_blob.data () + next_ifd_offset_pos, 0 , tiff_endian);
1049+ spdlog::debug (" Set next IFD offset to 0 (was {})" , next_ifd);
1050+ }
1051+ }
1052+
1053+ // Prepend "Exif\0\0" header if not already present
1054+ if (tiff_blob.size () < 6 || memcmp (tiff_blob.data (), " Exif\0\0 " , 6 ) != 0 )
1055+ {
1056+ vector<uint8_t > exif_blob (6 + tiff_blob.size ());
1057+ exif_blob[0 ] = ' E' ;
1058+ exif_blob[1 ] = ' x' ;
1059+ exif_blob[2 ] = ' i' ;
1060+ exif_blob[3 ] = ' f' ;
1061+ exif_blob[4 ] = 0 ;
1062+ exif_blob[5 ] = 0 ;
1063+ memcpy (exif_blob.data () + 6 , tiff_blob.data (), tiff_blob.size ());
1064+ return exif_blob;
1065+ }
1066+
1067+ return tiff_blob;
1068+ }
1069+ catch (const std::exception &e)
1070+ {
1071+ spdlog::debug (" Failed to extract EXIF blob: {}" , e.what ());
1072+ return {};
1073+ }
1074+ }
1075+
9511076vector<ImagePtr> load_sub_images (TIFF *tif, bool reverse_endian, tdir_t dir, const ImageLoadOptions &opts)
9521077{
9531078 vector<ImagePtr> images;
@@ -1040,17 +1165,28 @@ vector<ImagePtr> load_tiff_image(istream &is, string_view filename, const ImageL
10401165
10411166 auto tif_guard = ScopeGuard{[tif] { TIFFClose (tif); }};
10421167
1043- // Extract EXIF data once for all images/sub-images
1044- json exif_json;
1045- try
1046- {
1047- exif_json = exif_to_json (data.data (), data.size ());
1048- if (!exif_json.empty ())
1049- spdlog::debug (" EXIF metadata successfully parsed" );
1050- }
1051- catch (const std::exception &e)
1168+ // Extract EXIF/TIFF metadata using libexif
1169+ Exif exif;
1170+ json exif_json;
1171+ vector<uint8_t > exif_blob = extract_tiff_exif_blob (data, reverse_endian);
1172+ if (!exif_blob.empty ())
10521173 {
1053- spdlog::warn (" Exception while parsing EXIF data: {}" , e.what ());
1174+ spdlog::debug (" Found EXIF data of size {} bytes" , exif_blob.size ());
1175+
1176+ try
1177+ {
1178+ exif = Exif{exif_blob};
1179+ exif_json = exif.to_json ();
1180+ if (!exif_json.empty ())
1181+ spdlog::debug (" TIFF/EXIF metadata successfully parsed" );
1182+ else
1183+ spdlog::debug (" EXIF blob extracted but parsing returned empty result" );
1184+ }
1185+ catch (const std::exception &e)
1186+ {
1187+ spdlog::warn (" Exception while parsing EXIF data: {}" , e.what ());
1188+ exif.reset ();
1189+ }
10541190 }
10551191
10561192 vector<ImagePtr> images;
@@ -1065,8 +1201,11 @@ vector<ImagePtr> load_tiff_image(istream &is, string_view filename, const ImageL
10651201 {
10661202 image->filename = filename;
10671203 // Use pre-parsed EXIF data
1068- if (!exif_json.empty ())
1204+ if (exif.valid ())
1205+ {
1206+ image->exif = exif;
10691207 image->metadata [" exif" ] = exif_json;
1208+ }
10701209 images.push_back (image);
10711210 }
10721211
@@ -1075,8 +1214,11 @@ vector<ImagePtr> load_tiff_image(istream &is, string_view filename, const ImageL
10751214 {
10761215 sub_image->filename = filename;
10771216 // Use pre-parsed EXIF data
1078- if (!exif_json.empty ())
1217+ if (exif.valid ())
1218+ {
1219+ sub_image->exif = exif;
10791220 sub_image->metadata [" exif" ] = exif_json;
1221+ }
10801222 images.push_back (sub_image);
10811223 }
10821224
0 commit comments