@@ -150,6 +150,72 @@ class C2paSigningAlg(enum.IntEnum):
150150 ctypes .c_ubyte ), ctypes .c_size_t )
151151
152152
153+ def _guess_mime_type_using_magic_number (file_path : Union [str , Path ]) -> Optional [tuple [str , str ]]:
154+ """Guess MIME type by reading file header bytes.
155+ Currently supports:
156+ - SVG files (start with <?xml or <svg)
157+ - Image formats: PNG, JPEG, GIF, TIFF, WebP, AVIF, HEIC, HEIF, DNG
158+ - Video formats: MP4, MOV, AVI
159+ - Audio formats: MP3, M4A, WAV
160+ - Document formats: PDF
161+ Args:
162+ file_path: Path to the file to check
163+ Returns:
164+ Tuple of (extension, mime_type) if detected, None otherwise
165+ """
166+ try :
167+ with open (file_path , 'rb' ) as f :
168+ # Read first 1024 bytes to check for file signatures
169+ header = f .read (1024 )
170+
171+ # Convert to string for easier pattern matching (for text-based formats)
172+ header_str = header .decode ('utf-8' , errors = 'ignore' ).strip ()
173+
174+ # Check for SVG signatures
175+ if header_str .startswith ('<?xml' ) and '<svg' in header_str :
176+ return ('svg' , 'image/svg+xml' )
177+ elif header_str .startswith ('<svg' ):
178+ return ('svg' , 'image/svg+xml' )
179+
180+ # Check for PDF signature
181+ if header .startswith (b'%PDF' ):
182+ return ('pdf' , 'application/pdf' )
183+
184+ # Check for image formats
185+ if header .startswith (b'\x89 PNG\r \n \x1a \n ' ):
186+ return ('png' , 'image/png' )
187+ elif header .startswith (b'\xff \xd8 \xff ' ):
188+ return ('jpg' , 'image/jpeg' )
189+ elif header .startswith (b'GIF87a' ) or header .startswith (b'GIF89a' ):
190+ return ('gif' , 'image/gif' )
191+ elif header .startswith (b'II*\x00 ' ) or header .startswith (b'MM\x00 *' ):
192+ return ('tiff' , 'image/tiff' )
193+ elif header .startswith (b'RIFF' ) and header [8 :12 ] == b'WEBP' :
194+ return ('webp' , 'image/webp' )
195+ elif header .startswith (b'\x00 \x00 \x00 \x20 ftypavif' ):
196+ return ('avif' , 'image/avif' )
197+
198+ # Check for audio formats
199+ elif header .startswith (b'ID3' ) or header .startswith (b'\xff \xfb ' ) or header .startswith (b'\xff \xf3 ' ):
200+ return ('mp3' , 'audio/mpeg' )
201+ elif header .startswith (b'\x00 \x00 \x00 \x20 ftypM4A' ) or header .startswith (b'\x00 \x00 \x00 \x1c ftypM4A' ):
202+ return ('m4a' , 'audio/mp4' )
203+ elif header .startswith (b'RIFF' ) and header [8 :12 ] == b'WAVE' :
204+ return ('wav' , 'audio/wav' )
205+
206+ # Check for video formats
207+ # MP4: look for 'ftyp' at offset 4 and major brand in common MP4 video brands
208+ # Generally catches MP4-based formats that were not caught above
209+ elif header [4 :8 ] == b'ftyp' and header [8 :12 ] in {b'mp41' , b'mp42' , b'isom' , b'iso2' , b'avc1' , b'dash' , b'M4V ' }:
210+ return ('mp4' , 'video/mp4' )
211+ elif header .startswith (b'RIFF' ) and header [8 :12 ] == b'AVI ' :
212+ return ('avi' , 'video/x-msvideo' )
213+
214+ return None
215+ except Exception :
216+ return None
217+
218+
153219class StreamContext (ctypes .Structure ):
154220 """Opaque structure for stream context."""
155221 _fields_ = [] # Empty as it's opaque in the C API
0 commit comments