Skip to content

Commit 9f3d69b

Browse files
committed
fix: Initial file guesses
1 parent 839a570 commit 9f3d69b

File tree

16 files changed

+183
-1
lines changed

16 files changed

+183
-1
lines changed

src/c2pa/c2pa.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,72 @@ class C2paSigningAlg(enum.IntEnum):
150150
ctypes.c_ubyte), ctypes.c_size_t)
151151

152152

153+
def _guess_mime_type_using_magic_number(file_path: Union[str, Path]) -> Optional[tuple[str, str]]:
154+
"""Guess MIME type by reading file header bytes.
155+
Currently supports:
156+
- SVG files (start with <?xml or <svg)
157+
- Image formats: PNG, JPEG, GIF, TIFF, WebP, AVIF, HEIC, HEIF, DNG
158+
- Video formats: MP4, MOV, AVI
159+
- Audio formats: MP3, M4A, WAV
160+
- Document formats: PDF
161+
Args:
162+
file_path: Path to the file to check
163+
Returns:
164+
Tuple of (extension, mime_type) if detected, None otherwise
165+
"""
166+
try:
167+
with open(file_path, 'rb') as f:
168+
# Read first 1024 bytes to check for file signatures
169+
header = f.read(1024)
170+
171+
# Convert to string for easier pattern matching (for text-based formats)
172+
header_str = header.decode('utf-8', errors='ignore').strip()
173+
174+
# Check for SVG signatures
175+
if header_str.startswith('<?xml') and '<svg' in header_str:
176+
return ('svg', 'image/svg+xml')
177+
elif header_str.startswith('<svg'):
178+
return ('svg', 'image/svg+xml')
179+
180+
# Check for PDF signature
181+
if header.startswith(b'%PDF'):
182+
return ('pdf', 'application/pdf')
183+
184+
# Check for image formats
185+
if header.startswith(b'\x89PNG\r\n\x1a\n'):
186+
return ('png', 'image/png')
187+
elif header.startswith(b'\xff\xd8\xff'):
188+
return ('jpg', 'image/jpeg')
189+
elif header.startswith(b'GIF87a') or header.startswith(b'GIF89a'):
190+
return ('gif', 'image/gif')
191+
elif header.startswith(b'II*\x00') or header.startswith(b'MM\x00*'):
192+
return ('tiff', 'image/tiff')
193+
elif header.startswith(b'RIFF') and header[8:12] == b'WEBP':
194+
return ('webp', 'image/webp')
195+
elif header.startswith(b'\x00\x00\x00\x20ftypavif'):
196+
return ('avif', 'image/avif')
197+
198+
# Check for audio formats
199+
elif header.startswith(b'ID3') or header.startswith(b'\xff\xfb') or header.startswith(b'\xff\xf3'):
200+
return ('mp3', 'audio/mpeg')
201+
elif header.startswith(b'\x00\x00\x00\x20ftypM4A') or header.startswith(b'\x00\x00\x00\x1cftypM4A'):
202+
return ('m4a', 'audio/mp4')
203+
elif header.startswith(b'RIFF') and header[8:12] == b'WAVE':
204+
return ('wav', 'audio/wav')
205+
206+
# Check for video formats
207+
# MP4: look for 'ftyp' at offset 4 and major brand in common MP4 video brands
208+
# Generally catches MP4-based formats that were not caught above
209+
elif header[4:8] == b'ftyp' and header[8:12] in {b'mp41', b'mp42', b'isom', b'iso2', b'avc1', b'dash', b'M4V '}:
210+
return ('mp4', 'video/mp4')
211+
elif header.startswith(b'RIFF') and header[8:12] == b'AVI ':
212+
return ('avi', 'video/x-msvideo')
213+
214+
return None
215+
except Exception:
216+
return None
217+
218+
153219
class StreamContext(ctypes.Structure):
154220
"""Opaque structure for stream context."""
155221
_fields_ = [] # Empty as it's opaque in the C API
725 KB
Binary file not shown.
95.2 KB
Binary file not shown.
723 KB
Binary file not shown.
135 KB
Binary file not shown.
3.84 MB
Binary file not shown.
1.86 MB
Binary file not shown.
809 KB
Binary file not shown.
9.69 KB
Binary file not shown.
292 KB
Binary file not shown.

0 commit comments

Comments
 (0)