Skip to content
Open
Show file tree
Hide file tree
Changes from 58 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
f2a8951
fix: create signer tests
tmathern Jun 20, 2025
32a14b8
fix: Test repro
tmathern Jun 20, 2025
07729ac
fix: Clean up
tmathern Jun 20, 2025
a60d376
fix: Plenty of debug logs
tmathern Jun 21, 2025
8e791d1
fix: Remove debug logs
tmathern Jun 21, 2025
97374d3
fix: Clean up
tmathern Jun 21, 2025
9b41397
fix: FOrmat
tmathern Jun 21, 2025
4e81626
fix: Clean up
tmathern Jun 21, 2025
547cd74
fix: Change return
tmathern Jun 21, 2025
fced710
fix: Change sign_file signature again
tmathern Jun 21, 2025
f693f55
fix: Improve pointer handling and refactor
tmathern Jun 21, 2025
53f0574
fix: Refactor
tmathern Jun 21, 2025
500eb60
fix: Refactor
tmathern Jun 21, 2025
496ee20
fix: Refactor
tmathern Jun 21, 2025
e262237
fix: Refactor 3
tmathern Jun 21, 2025
0db06c8
fix: Refactor once more
tmathern Jun 21, 2025
3ca91c6
fix: Refactor once more with overload
tmathern Jun 21, 2025
be49781
fix: Refactor once more with overload
tmathern Jun 21, 2025
8b5c6d4
fix: Format
tmathern Jun 21, 2025
22860df
fix: Change logic
tmathern Jun 21, 2025
a97a28f
fix: Deprecation
tmathern Jun 21, 2025
532825e
fix: Logic
tmathern Jun 21, 2025
e9b9f7a
fix: Test stdout output
tmathern Jun 21, 2025
9775407
fix: One last format
tmathern Jun 21, 2025
2ca2298
fix: Refactor
tmathern Jun 24, 2025
12f04d4
fix: Refactor 2
tmathern Jun 24, 2025
b2a47f4
fix: Refactor
tmathern Jun 25, 2025
7b075af
fix: Better API
tmathern Jun 25, 2025
8df82ba
fix: Verify error gets raised
tmathern Jun 25, 2025
6c865bd
fix: Verify error gets raised 2
tmathern Jun 25, 2025
cd13130
fix: Add context manager test for callback signer
tmathern Jun 25, 2025
4d50c05
fix: Verify used alg in tests
tmathern Jun 25, 2025
40a05f2
fix: More tests
tmathern Jun 25, 2025
034c779
fix: Verify signer can be used multiple times
tmathern Jun 25, 2025
b551e69
fix: Be more friendly with input
tmathern Jun 25, 2025
2835bb3
fix: Throw in stream optimization
tmathern Jun 25, 2025
b20c9db
fix: Faster memory tricks
tmathern Jun 25, 2025
7b9698f
fix: Memory handling change
tmathern Jun 25, 2025
aa701a7
fix: Docs
tmathern Jun 25, 2025
5cb1a71
fix: Docs
tmathern Jun 25, 2025
876d32a
fix: Refactor
tmathern Jun 25, 2025
e9828ea
fix: Import changes, error handling made consistent, formatting (#126)
tmathern Jun 25, 2025
c399d79
fix: Return sign values
tmathern Jun 25, 2025
839a570
fix: Return sign values
tmathern Jun 25, 2025
9f3d69b
fix: Initial file guesses
tmathern Jun 25, 2025
8b7aaa6
fix: One more test
tmathern Jun 25, 2025
2d46b89
fix: Add test
tmathern Jun 25, 2025
dc2df9a
fix: Update code for mov
tmathern Jun 25, 2025
82a3ee9
fix: Typo
tmathern Jun 25, 2025
380246e
fix: Update tests
tmathern Jun 25, 2025
53b6ff8
fix: Add docs on test files
tmathern Jun 26, 2025
376a562
fix: Prepare version number bump
tmathern Jun 26, 2025
a8b5904
fix: Change API build.sign* to return manifest bytes
tmathern Jun 26, 2025
6e9cd77
ci: Merge branch 'mathern/sign-file' into mathern/add-extension-guessing
tmathern Jun 26, 2025
31dbe02
fix: Follow-up update
tmathern Jun 26, 2025
3143355
ix: Merge in main
tmathern Jun 26, 2025
1b2e009
fix: Bump version number
tmathern Jun 26, 2025
0317922
fix: Bump version number
tmathern Jun 26, 2025
e4aa034
fix: Update docs
tmathern Jun 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "c2pa-python"
version = "0.12.0"
version = "0.12.1"
requires-python = ">=3.10"
description = "Python bindings for the C2PA Content Authenticity Initiative (CAI) library"
readme = { file = "README.md", content-type = "text/markdown" }
Expand Down
95 changes: 88 additions & 7 deletions src/c2pa/c2pa.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,75 @@ class C2paSigningAlg(enum.IntEnum):
ctypes.c_ubyte), ctypes.c_size_t)


def _guess_mime_type_using_magic_number(file_path: Union[str, Path]) -> Optional[tuple[str, str]]:
"""Guess MIME type by reading file header bytes.
Currently supports a limited set of files, as this is best effort.
You should consider adding extensions to filepaths to ensure we don't
have to guess too much.

Args:
file_path: Path to the file to check
Returns:
Tuple of (extension, mime_type) if detected, None otherwise
"""
try:
with open(file_path, 'rb') as f:
# Read first 1024 bytes to check for file signatures
header = f.read(1024)

# Convert to string for easier pattern matching (for text-based formats)
header_str = header.decode('utf-8', errors='ignore').strip()

# Check for SVG signatures
if header_str.startswith('<?xml') and '<svg' in header_str:
return ('svg', 'image/svg+xml')
elif header_str.startswith('<svg'):
return ('svg', 'image/svg+xml')

# Check for PDF signature
if header.startswith(b'%PDF'):
return ('pdf', 'application/pdf')

# Check for image formats
if header.startswith(b'\x89PNG\r\n\x1a\n'):
return ('png', 'image/png')
elif header.startswith(b'\xff\xd8\xff'):
return ('jpg', 'image/jpeg')
elif header.startswith(b'GIF87a') or header.startswith(b'GIF89a'):
return ('gif', 'image/gif')
elif header.startswith(b'II*\x00') or header.startswith(b'MM\x00*'):
return ('tiff', 'image/tiff')
elif header.startswith(b'RIFF') and header[8:12] == b'WEBP':
return ('webp', 'image/webp')
elif header.startswith(b'\x00\x00\x00\x20ftypavif'):
return ('avif', 'image/avif')
elif header.startswith(b'\x00\x00\x00\x18ftypmif1') or header.startswith(b'\x00\x00\x00\x18ftypmsf1') or header.startswith(b'\x00\x00\x00\x18ftypheic') or header.startswith(b'\x00\x00\x00\x18ftypheix'):
return ('heic', 'image/heic')

# Check for audio formats
elif header.startswith(b'ID3') or header.startswith(b'\xff\xfb') or header.startswith(b'\xff\xf3'):
return ('mp3', 'audio/mpeg')
elif header.startswith(b'\x00\x00\x00\x20ftypM4A') or header.startswith(b'\x00\x00\x00\x1cftypM4A'):
return ('m4a', 'audio/mp4')
elif header.startswith(b'RIFF') and header[8:12] == b'WAVE':
return ('wav', 'audio/wav')

# Check for video formats
# MP4: look for 'ftyp' at offset 4 and major brand in common MP4 video brands
# Generally catches MP4-based formats that were not caught above
elif header[4:8] == b'ftyp' and header[8:12] in {b'mp41', b'mp42', b'isom', b'iso2', b'avc1', b'dash', b'M4V '}:
return ('mp4', 'video/mp4')
# MOV: look for 'ftyp' at offset 4 and major brand in common QuickTime brands
elif header[4:8] == b'ftyp' and header[8:12] in {b'qt ', b'M4V '}:
return ('mov', 'video/quicktime')
elif header.startswith(b'RIFF') and header[8:12] == b'AVI ':
return ('avi', 'video/x-msvideo')

return None
except Exception:
return None


class StreamContext(ctypes.Structure):
"""Opaque structure for stream context."""
_fields_ = [] # Empty as it's opaque in the C API
Expand Down Expand Up @@ -676,8 +745,14 @@ def sign_file(
# Get the MIME type from the file extension
mime_type = mimetypes.guess_type(str(source_path))[0]
if not mime_type:
raise C2paError.NotSupported(
f"Could not determine MIME type for file: {source_path}")
# If the file is extensionless, we may not be able to properly guess
# So we attempt one more guessing round here
other_mimetype_guess = _guess_mime_type_using_magic_number(source_path)
if other_mimetype_guess:
mime_type, _ = other_mimetype_guess
else:
raise C2paError.NotSupported(
f"Could not determine MIME type for file: {source_path}")

if return_manifest_as_bytes:
# Convert Python streams to Stream objects for internal signing
Expand Down Expand Up @@ -1039,7 +1114,7 @@ def __init__(self,
"""Create a new Reader.

Args:
format_or_path: The format or path to read from
format_or_path: The format (eg. file extension) or path to read from
stream: Optional stream to read from (any Python stream-like object)
manifest_data: Optional manifest data in bytes

Expand Down Expand Up @@ -1847,7 +1922,7 @@ def _sign_internal(

Args:
signer: The signer to use
format: The MIME type or extension of the content
format: The MIME type or extension of the content (extension preferred)
source_stream: The source stream
dest_stream: The destination stream

Expand Down Expand Up @@ -1915,7 +1990,7 @@ def sign(
"""Sign the builder's content and write to a destination stream.

Args:
format: The MIME type or extension of the content
format: The MIME type or extension of the content (extension preferred)
source: The source stream (any Python stream-like object)
dest: The destination stream (any Python stream-like object)
signer: The signer to use
Expand Down Expand Up @@ -1952,8 +2027,14 @@ def sign_file(self,
# Get the MIME type from the file extension
mime_type = mimetypes.guess_type(str(source_path))[0]
if not mime_type:
raise C2paError.NotSupported(
f"Could not determine MIME type for file: {source_path}")
# If the file is extensionless, we may not be able to properly guess
# So we attempt one more guessing round here
other_mimetype_guess = _guess_mime_type_using_magic_number(source_path)
if other_mimetype_guess:
mime_type, _ = other_mimetype_guess
else:
raise C2paError.NotSupported(
f"Could not determine MIME type for file: {source_path}")

# Open source and destination files
with open(source_path, 'rb') as source_file, open(dest_path, 'wb') as dest_file:
Expand Down
Binary file added tests/fixtures/extensionless-files/avi
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/avif
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/gif
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/heic
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/jpg
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/m4a
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/mp3
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/mp4
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/pdf
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/png
Binary file not shown.
3 changes: 3 additions & 0 deletions tests/fixtures/extensionless-files/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# About the these test files

The test files in these folder are a subseet of files found in the [c2pa-rs repo's test fixtures](https://github.com/contentauth/c2pa-rs/tree/main/sdk/tests/fixtures), renamed to loose the file extension.
11 changes: 11 additions & 0 deletions tests/fixtures/extensionless-files/svg

Large diffs are not rendered by default.

Binary file added tests/fixtures/extensionless-files/tiff
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/wav
Binary file not shown.
Binary file added tests/fixtures/extensionless-files/webp
Binary file not shown.
191 changes: 190 additions & 1 deletion tests/test_unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import shutil

from c2pa import Builder, C2paError as Error, Reader, C2paSigningAlg as SigningAlg, C2paSignerInfo, Signer, sdk_version
from c2pa.c2pa import Stream, read_ingredient_file, read_file, sign_file, load_settings, create_signer
from c2pa.c2pa import Stream, read_ingredient_file, read_file, sign_file, load_settings, create_signer, _guess_mime_type_using_magic_number

# Suppress deprecation warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
Expand Down Expand Up @@ -783,6 +783,150 @@ def test_sign_file(self):
# Clean up the temporary directory
shutil.rmtree(temp_dir)

def test_sign_extensionless_jpg_file(self):
"""Test signing a file using the sign_file method."""
# Create a temporary directory for the test
temp_dir = tempfile.mkdtemp()
try:
# Use the extensionless jpg from the test files
extensionless_dir = os.path.join(FIXTURES_DIR, "extensionless-files")
source_path = os.path.join(extensionless_dir, "jpg")

output_path = os.path.join(temp_dir, "signed_output.jpg")

# Use the sign_file method
builder = Builder(self.manifestDefinition)
manifest_bytes = builder.sign_file(
source_path=source_path,
dest_path=output_path,
signer=self.signer
)

# Verify the output file was created
self.assertTrue(os.path.exists(output_path))

# Verify
self.assertIsInstance(manifest_bytes, bytes)
self.assertGreater(len(manifest_bytes), 0)

# Read the signed file and verify the manifest
with open(output_path, "rb") as file:
reader = Reader("image/jpeg", file)
json_data = reader.json()
self.assertIn("Python Test", json_data)
self.assertNotIn("validation_status", json_data)

finally:
# Clean up the temporary directory
shutil.rmtree(temp_dir)

def test_sign_extensionless_svg_file(self):
"""Test signing an extensionless SVG file using the sign_file method."""
# Create a temporary directory for the test
temp_dir = tempfile.mkdtemp()
try:
# Use the extensionless svg from the test files
extensionless_dir = os.path.join(FIXTURES_DIR, "extensionless-files")
source_path = os.path.join(extensionless_dir, "svg")

output_path = os.path.join(temp_dir, "signed_output.svg")

# Use the sign_file method
builder = Builder(self.manifestDefinition)
manifest_bytes = builder.sign_file(
source_path=source_path,
dest_path=output_path,
signer=self.signer
)

# Verify the output file was created
self.assertTrue(os.path.exists(output_path))

# Verify output
self.assertIsInstance(manifest_bytes, bytes)
self.assertGreater(len(manifest_bytes), 0)

# Read the signed file and verify the manifest
with open(output_path, "rb") as file:
reader = Reader("image/svg+xml", file)
json_data = reader.json()
self.assertIn("Python Test", json_data)
self.assertNotIn("validation_status", json_data)

finally:
# Clean up the temporary directory
shutil.rmtree(temp_dir)

def test_builder_sign_extensionless_svg_file_mimetype(self):
"""Test signing an extensionless SVG file using the builder.sign method."""
# Create a temporary directory for the test
temp_dir = tempfile.mkdtemp()
try:
# Use the extensionless svg from the test files
extensionless_dir = os.path.join(FIXTURES_DIR, "extensionless-files")
source_path = os.path.join(extensionless_dir, "svg")

output_path = os.path.join(temp_dir, "signed_output.svg")

# Use the builder.sign method
builder = Builder(self.manifestDefinition)
with open(source_path, 'rb') as source_file, open(output_path, 'wb') as dest_file:
builder.sign(
signer=self.signer,
format="image/svg+xml", # Use extension instead of MIME type
source=source_file,
dest=dest_file
)

# Verify the output file was created
self.assertTrue(os.path.exists(output_path))

# Read the signed file and verify the manifest
with open(output_path, "rb") as file:
reader = Reader("image/svg+xml", file)
json_data = reader.json()
self.assertIn("Python Test", json_data)
self.assertNotIn("validation_status", json_data)

finally:
# Clean up the temporary directory
shutil.rmtree(temp_dir)

def test_builder_sign_extensionless_svg_file_ext(self):
"""Test signing an extensionless SVG file using the builder.sign method."""
# Create a temporary directory for the test
temp_dir = tempfile.mkdtemp()
try:
# Use the extensionless svg from the test files
extensionless_dir = os.path.join(FIXTURES_DIR, "extensionless-files")
source_path = os.path.join(extensionless_dir, "svg")

output_path = os.path.join(temp_dir, "signed_output.svg")

# Use the builder.sign method
builder = Builder(self.manifestDefinition)
with open(source_path, 'rb') as source_file, open(output_path, 'wb') as dest_file:
builder.sign(
signer=self.signer,
format="svg", # Use extension instead of MIME type
source=source_file,
dest=dest_file
)

# Verify the output file was created
self.assertTrue(os.path.exists(output_path))

# Read the signed file and verify the manifest
with open(output_path, "rb") as file:
reader = Reader("image/svg+xml", file)
json_data = reader.json()
self.assertIn("Python Test", json_data)
self.assertNotIn("validation_status", json_data)

finally:
# Clean up the temporary directory
shutil.rmtree(temp_dir)

def test_sign_file_callback_signer(self):
"""Test signing a file using the sign_file method."""

Expand Down Expand Up @@ -1447,5 +1591,50 @@ def test_sign_file(self):
os.remove(output_path)


class TestHelpers(unittest.TestCase):
def test_guess_mime_type_using_magic_number(self):
"""Test the _guess_mime_type_using_magic_number function with various file formats."""
extensionless_dir = os.path.join(FIXTURES_DIR, "extensionless-files")

# Test cases with explicit file paths and expected results
test_cases = [
(os.path.join(extensionless_dir, "svg"), ('svg', 'image/svg+xml')),
(os.path.join(extensionless_dir, "png"), ('png', 'image/png')),
(os.path.join(extensionless_dir, "jpg"), ('jpg', 'image/jpeg')),
(os.path.join(extensionless_dir, "gif"), ('gif', 'image/gif')),
(os.path.join(extensionless_dir, "heic"), ('heic', 'image/heic')),
(os.path.join(extensionless_dir, "tiff"), ('tiff', 'image/tiff')),
(os.path.join(extensionless_dir, "webp"), ('webp', 'image/webp')),
(os.path.join(extensionless_dir, "avif"), ('avif', 'image/avif')),
(os.path.join(extensionless_dir, "mp4"), ('mp4', 'video/mp4')),
(os.path.join(extensionless_dir, "avi"), ('avi', 'video/x-msvideo')),
(os.path.join(extensionless_dir, "mp3"), ('mp3', 'audio/mpeg')),
(os.path.join(extensionless_dir, "m4a"), ('m4a', 'audio/mp4')),
(os.path.join(extensionless_dir, "wav"), ('wav', 'audio/wav')),
(os.path.join(extensionless_dir, "pdf"), ('pdf', 'application/pdf')),
]

# Test each file explicitly
for file_path, expected_result in test_cases:
filename = os.path.basename(file_path)
with self.subTest(filename=filename):
result = _guess_mime_type_using_magic_number(file_path)

# Verify the result matches expectations
self.assertIsNotNone(result, f"Failed to detect type for {filename}")
self.assertEqual(result, expected_result,
f"Expected {expected_result} for {filename}, got {result}")

# Verify extension matches filename
expected_extension = filename
self.assertEqual(result[0], expected_extension,
f"Extension mismatch for {filename}: expected {expected_extension}, got {result[0]}")

# Test with non-existent file
non_existent_path = os.path.join(extensionless_dir, "non_existent_file")
result = _guess_mime_type_using_magic_number(non_existent_path)
self.assertIsNone(result, "Should return None for non-existent file")


if __name__ == '__main__':
unittest.main()