@@ -92,7 +92,14 @@ extension ImageDecoders {
9292 guard let endOfScan = scanner. scan ( data) , endOfScan > 0 else {
9393 return nil
9494 }
95- guard let image = ImageDecoders . Default. _decode ( data [ 0 ... endOfScan] , scale: scale) else {
95+
96+ // To decode data correctly, binary needs to end with an EOI (End Of Image) marker (0xFFD9)
97+ var imageData = data [ 0 ... endOfScan]
98+ if data [ endOfScan - 1 ] != 0xFF || data [ endOfScan] != 0xD9 {
99+ imageData += [ 0xFF , 0xD9 ]
100+ }
101+ // We could be appending the data to `CGImageSourceCreateIncremental` and producing `CGImage`s from there but the EOI addition forces us to have to finalize everytime, which counters any performance gains.
102+ guard let image = ImageDecoders . Default. _decode ( imageData, scale: scale) else {
96103 return nil
97104 }
98105 return ImageContainer ( image: image, type: assetType, isPreview: true , userInfo: [ . scanNumberKey: numberOfScans] )
@@ -128,22 +135,37 @@ private struct ProgressiveJPEGScanner: Sendable {
128135 /// Scans the given data. If finds new scans, returns the last index of the
129136 /// last available scan.
130137 mutating func scan( _ data: Data ) -> Int ? {
138+ if scannedIndex < 0 {
139+ guard let header = ImageProperties . JPEG ( data) ,
140+ header. isProgressive else {
141+ return nil
142+ }
143+
144+ // we always want to start after the Start-Of-Frame marker to skip over any thumbnail markers which could interfere with the parsing
145+ scannedIndex = header. startOfFrameOffset + 2
146+ }
147+
131148 // Check if there is more data to scan.
132149 guard ( scannedIndex + 1 ) < data. count else {
133150 return nil
134151 }
135152
136153 // Start scanning from the where it left off previous time.
137- var index = ( scannedIndex + 1 )
154+ // 1. we use `Data.firstIndex` as it's faster than iterating byte-by-byte in Swift
155+ // 2. we could use `.lastIndex` and be much faster but we want to keep track of scan number
138156 var numberOfScans = self . numberOfScans
139- while index < ( data . count - 1 ) {
140- scannedIndex = index
141- // 0xFF, 0xDA - Start Of Scan
142- if data [ index ] == 0xFF , data [ index + 1 ] == 0xDA {
143- lastStartOfScan = index
157+ var searchRange = ( scannedIndex + 1 ) ..< data . count
158+ // 0xFF, 0xDA - Start Of Scan
159+ while let nextMarker = data [ searchRange ] . firstIndex ( of : 0xFF ) ,
160+ nextMarker < data. count - 1 {
161+ if data [ nextMarker + 1 ] == 0xDA {
144162 numberOfScans += 1
163+ lastStartOfScan = nextMarker
164+ scannedIndex = nextMarker + 1
165+ } else {
166+ scannedIndex = nextMarker
145167 }
146- index += 1
168+ searchRange = ( scannedIndex + 1 ) ..< data . count
147169 }
148170
149171 // Found more scans this the previous time
@@ -175,42 +197,126 @@ extension ImageDecoders.Default {
175197
176198enum ImageProperties { }
177199
200+
178201// Keeping this private for now, not sure neither about the API, not the implementation.
179202extension ImageProperties {
180203 struct JPEG {
181204 var isProgressive : Bool
205+ var startOfFrameOffset : Int
182206
183207 init ? ( _ data: Data ) {
184- guard let isProgressive = ImageProperties . JPEG . isProgressive ( data) else {
208+ guard let header = Self . parseHeader ( data) else {
185209 return nil
186210 }
211+ self = header
212+ }
213+
214+ private init ( isProgressive: Bool , startOfFrameOffset: Int ) {
187215 self . isProgressive = isProgressive
216+ self . startOfFrameOffset = startOfFrameOffset
188217 }
189-
190- private static func isProgressive( _ data: Data ) -> Bool ? {
191- var index = 3 // start scanning right after magic numbers
192- while index < ( data. count - 1 ) {
193- // A example of first few bytes of progressive jpeg image:
194- // FF D8 FF E0 00 10 4A 46 49 46 00 01 01 00 00 48 00 ...
195- //
218+
219+ // This is the most accurate way to determine whether this is a progressive JPEG, but sometimes can come back nil for baseline JPEGs
220+ private static func isProgressive_io( _ data: Data ) -> Bool ? {
221+ guard let imageSource = CGImageSourceCreateWithData ( data as CFData , nil ) ,
222+ CGImageSourceGetCount ( imageSource) > 0 else {
223+ return nil
224+ }
225+
226+ // Get the properties for the first image
227+ let properties = CGImageSourceCopyPropertiesAtIndex ( imageSource, 0 , nil ) as? [ CFString : Any ]
228+ let jfifProperties = properties ? [ kCGImagePropertyJFIFDictionary] as? [ CFString : Any ]
229+
230+ // this property might be missing for baseline JPEGs so we can't depend on this completely
231+ if let isProgressive = jfifProperties ? [ kCGImagePropertyJFIFIsProgressive] as? Bool {
232+ return isProgressive
233+ }
234+
235+ return nil
236+ }
237+
238+ // Manually walk through JPEG header
239+ static func parseHeader( _ data: Data ) -> JPEG ? {
240+ // JPEG starts with SOI marker (FF D8)
241+ guard data. count >= 2 , data [ 0 ] == 0xFF , data [ 1 ] == 0xD8 else {
242+ return nil
243+ }
244+
245+ // Start after SOI marker
246+ var searchRange = 2 ..< data. count
247+
248+ // Process all segments until we find an SOF marker or reach the end
249+ while let nextMarker = data [ searchRange] . firstIndex ( of: 0xFF ) ,
250+ nextMarker < data. count - 1 {
251+
252+ // Skip Padding
253+ var controlIndex = nextMarker + 1
254+ while data [ controlIndex] == 0xFF {
255+ controlIndex += 1
256+ if controlIndex >= data. count {
257+ break
258+ }
259+ }
260+
261+ // The byte coming after 0xFF gives us the information
262+ let marker = data [ controlIndex]
263+
264+ // Check for SOF markers that indicate encoding type
196265 // 0xFF, 0xC0 - Start Of Frame (baseline DCT)
197266 // 0xFF, 0xC2 - Start Of Frame (progressive DCT)
198267 // https://en.wikipedia.org/wiki/JPEG
199- //
200- // As an alternative, Image I/O provides facilities to parse
201- // JPEG metadata via CGImageSourceCopyPropertiesAtIndex. It is a
202- // bit too convoluted to use and most likely slightly less
203- // efficient that checking this one special bit directly.
204- if data [ index] == 0xFF {
205- if data [ index + 1 ] == 0xC2 {
206- return true
207- }
208- if data [ index + 1 ] == 0xC0 {
209- return false // baseline
210- }
268+ // WARNING: These markers may also appear as part of a thumbnail in exif segment, so we need to make sure we skip these segments
269+ let offset = controlIndex - 1
270+ if marker == 0xC0 {
271+ return JPEG ( isProgressive: false , startOfFrameOffset: offset)
272+ } else if marker == 0xC2 {
273+ return JPEG ( isProgressive: true , startOfFrameOffset: offset)
274+ }
275+
276+ // Next iteration we look for the next 0xFF byte after this one
277+ searchRange = ( controlIndex + 1 ) ..< data. count
278+
279+ // Handle markers without length fields (like RST markers, TEM, etc.)
280+ if ( marker >= 0xD0 && marker <= 0xD7 ) || marker == 0x01 {
281+ // These markers have no data segment
282+ continue
283+ }
284+
285+ // Handle EOI (End of Image)
286+ guard marker != 0xD9 else {
287+ break
288+ }
289+
290+ // Handle SOS (Start of Scan) - if we've reached this place we've missed the SOF marker
291+ guard marker != 0xDA else {
292+ break
293+ }
294+
295+ // All other markers have a length field, make sure we have enough bytes for the length
296+ let lengthIndex = controlIndex + 1
297+ guard lengthIndex < data. count - 1 else {
298+ break
299+ }
300+
301+ // Read the length (includes the length bytes themselves)
302+ let length = UInt16 ( data [ lengthIndex] ) << 8 | UInt16 ( data [ lengthIndex + 1 ] )
303+
304+ // Skip this segment (length includes the 2 length bytes, so should be at least 2)
305+ guard length > 2 else {
306+ // Invalid length, corrupted JPEG
307+ break
308+ }
309+
310+ let frontier = lengthIndex + Int( length)
311+ guard frontier < data. count else {
312+ // we don't have enough data to reach end of this segment
313+ break
211314 }
212- index += 1
315+
316+ searchRange = frontier..< data. count
213317 }
318+
319+ // If we reached this part we haven't found SOF marker, likely data is not complete
214320 return nil
215321 }
216322 }
0 commit comments