Skip to content

Commit f22bcad

Browse files
author
Emrick Donadei
committed
Add Manifest.from_signature() to extract manifest from existing signatures
This method enables reading a manifest from a signature file without performing cryptographic verification. This is the foundation for incremental re-hashing, where we need to know what files were previously signed to determine which files need re-hashing. The method: - Reads and parses Sigstore bundle JSON format - Extracts the DSSE envelope payload - Decodes base64-encoded payload - Validates manifest integrity (root digest matches resources) - Returns a Manifest object Includes comprehensive tests covering: - Valid manifest extraction - Rejection of inconsistent manifests - Error handling for missing files, invalid JSON, and missing envelopes Related to issue #160 - API for incremental model re-hashing
1 parent 460f0e8 commit f22bcad

File tree

2 files changed

+220
-0
lines changed

2 files changed

+220
-0
lines changed

src/model_signing/manifest.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@
3939
"""
4040

4141
import abc
42+
import base64
4243
from collections.abc import Iterable, Iterator
4344
import dataclasses
45+
import json
4446
import pathlib
4547
import sys
4648
from typing import Any, Final
@@ -466,3 +468,53 @@ def serialization_type(self) -> dict[str, Any]:
466468
manifest so that signature verification can use the same method.
467469
"""
468470
return self._serialization_type.serialization_parameters
471+
472+
@classmethod
473+
def from_signature(cls, signature_path: pathlib.Path) -> Self:
474+
"""Extracts a manifest from an existing signature file.
475+
476+
This method reads a signature file (Sigstore bundle) and extracts the
477+
manifest without performing cryptographic verification. This is useful
478+
for incremental re-hashing where you need to know what files were
479+
previously signed without verifying the signature.
480+
481+
Args:
482+
signature_path: Path to the signature file to read.
483+
484+
Returns:
485+
A Manifest object representing the signed model.
486+
487+
Raises:
488+
ValueError: If the signature file cannot be parsed or doesn't
489+
contain a valid manifest.
490+
FileNotFoundError: If the signature file doesn't exist.
491+
"""
492+
# Avoid circular import by importing here
493+
from model_signing._signing import signing
494+
495+
# Read the signature file
496+
content = signature_path.read_text(encoding="utf-8")
497+
bundle_dict = json.loads(content)
498+
499+
# Extract the DSSE envelope payload
500+
if "dsseEnvelope" in bundle_dict:
501+
# This is a protobuf-based bundle
502+
envelope = bundle_dict["dsseEnvelope"]
503+
elif "dsse_envelope" in bundle_dict:
504+
# Alternative snake_case naming
505+
envelope = bundle_dict["dsse_envelope"]
506+
else:
507+
raise ValueError(
508+
"Signature file does not contain a DSSE envelope"
509+
)
510+
511+
# Decode the payload (it's base64 encoded)
512+
payload_b64 = envelope.get("payload")
513+
if not payload_b64:
514+
raise ValueError("DSSE envelope does not contain a payload")
515+
516+
payload_bytes = base64.b64decode(payload_b64)
517+
payload_dict = json.loads(payload_bytes)
518+
519+
# Use the existing function to convert DSSE payload to manifest
520+
return signing.dsse_payload_to_manifest(payload_dict)

tests/manifest_test.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,171 @@ def test_manifest_has_the_correct_resource_descriptors(self):
206206
assert descriptors[0].digest.digest_value == b"hash1"
207207
assert descriptors[1].digest.digest_value == b"hash2"
208208
assert descriptors[2].digest.digest_value == b"hash3"
209+
210+
211+
class TestManifestFromSignature:
212+
def test_from_signature_rejects_inconsistent_manifest(self, tmp_path):
213+
import base64
214+
import json
215+
216+
# Create a Sigstore bundle with inconsistent root digest
217+
# The subject digest doesn't match the hash of the resources
218+
payload_dict = {
219+
"_type": "https://in-toto.io/Statement/v1",
220+
"subject": [
221+
{
222+
"name": "test_model",
223+
"digest": {
224+
"sha256": "0b8a5a8c8e8f1a8b8c8d8e8f2a8b8c8d8e8f3a8b8c8d8e8f4a8b8c8d8e8f5a8b"
225+
},
226+
}
227+
],
228+
"predicateType": "https://model_signing/signature/v1.0",
229+
"predicate": {
230+
"serialization": {
231+
"method": "files",
232+
"hash_type": "sha256",
233+
"allow_symlinks": False,
234+
"ignore_paths": [],
235+
},
236+
"resources": [
237+
{
238+
"name": "file1.txt",
239+
"algorithm": "sha256",
240+
"digest": "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234",
241+
},
242+
{
243+
"name": "file2.txt",
244+
"algorithm": "sha256",
245+
"digest": "5678dcba5678dcba5678dcba5678dcba5678dcba5678dcba5678dcba5678dcba",
246+
},
247+
],
248+
},
249+
}
250+
251+
# Create DSSE envelope
252+
payload_json = json.dumps(payload_dict)
253+
payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode(
254+
"utf-8"
255+
)
256+
257+
bundle_dict = {
258+
"mediaType": "application/vnd.dev.sigstore.bundle.v0.3+json",
259+
"verificationMaterial": {
260+
"publicKey": {"hint": "test"},
261+
"tlogEntries": [],
262+
},
263+
"dsseEnvelope": {
264+
"payload": payload_b64,
265+
"payloadType": "application/vnd.in-toto+json",
266+
"signatures": [{"sig": "fake_signature"}],
267+
},
268+
}
269+
270+
# Write to file
271+
sig_file = tmp_path / "test.sig"
272+
sig_file.write_text(json.dumps(bundle_dict), encoding="utf-8")
273+
274+
# Verify that inconsistent manifest is rejected
275+
with pytest.raises(ValueError, match="Manifest is inconsistent"):
276+
manifest.Manifest.from_signature(sig_file)
277+
278+
def test_from_signature_extracts_valid_manifest(self, tmp_path):
279+
import base64
280+
import hashlib
281+
import json
282+
283+
# Create valid SHA256 hex digests (64 chars each)
284+
digest1_hex = "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234"
285+
digest2_hex = "5678dcba5678dcba5678dcba5678dcba5678dcba5678dcba5678dcba5678dcba"
286+
287+
digest1_bytes = bytes.fromhex(digest1_hex)
288+
digest2_bytes = bytes.fromhex(digest2_hex)
289+
290+
# Compute root digest (SHA256 of both digests concatenated)
291+
hasher = hashlib.sha256()
292+
hasher.update(digest1_bytes)
293+
hasher.update(digest2_bytes)
294+
root_digest = hasher.hexdigest()
295+
296+
payload_dict = {
297+
"_type": "https://in-toto.io/Statement/v1",
298+
"subject": [
299+
{"name": "test_model", "digest": {"sha256": root_digest}}
300+
],
301+
"predicateType": "https://model_signing/signature/v1.0",
302+
"predicate": {
303+
"serialization": {
304+
"method": "files",
305+
"hash_type": "sha256",
306+
"allow_symlinks": False,
307+
"ignore_paths": [],
308+
},
309+
"resources": [
310+
{
311+
"name": "file1.txt",
312+
"algorithm": "sha256",
313+
"digest": digest1_hex,
314+
},
315+
{
316+
"name": "file2.txt",
317+
"algorithm": "sha256",
318+
"digest": digest2_hex,
319+
},
320+
],
321+
},
322+
}
323+
324+
payload_json = json.dumps(payload_dict)
325+
payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode(
326+
"utf-8"
327+
)
328+
329+
bundle_dict = {
330+
"mediaType": "application/vnd.dev.sigstore.bundle.v0.3+json",
331+
"verificationMaterial": {
332+
"publicKey": {"hint": "test"},
333+
"tlogEntries": [],
334+
},
335+
"dsseEnvelope": {
336+
"payload": payload_b64,
337+
"payloadType": "application/vnd.in-toto+json",
338+
"signatures": [{"sig": "fake_signature"}],
339+
},
340+
}
341+
342+
sig_file = tmp_path / "test.sig"
343+
sig_file.write_text(json.dumps(bundle_dict), encoding="utf-8")
344+
345+
# Extract manifest
346+
extracted_manifest = manifest.Manifest.from_signature(sig_file)
347+
348+
# Verify the manifest has the correct files
349+
descriptors = list(extracted_manifest.resource_descriptors())
350+
assert len(descriptors) == 2
351+
assert descriptors[0].identifier == "file1.txt"
352+
assert descriptors[1].identifier == "file2.txt"
353+
assert descriptors[0].digest.digest_hex == digest1_hex
354+
assert descriptors[1].digest.digest_hex == digest2_hex
355+
assert extracted_manifest.model_name == "test_model"
356+
357+
def test_from_signature_file_not_found(self, tmp_path):
358+
non_existent = tmp_path / "does_not_exist.sig"
359+
with pytest.raises(FileNotFoundError):
360+
manifest.Manifest.from_signature(non_existent)
361+
362+
def test_from_signature_invalid_json(self, tmp_path):
363+
import json
364+
365+
sig_file = tmp_path / "invalid.sig"
366+
sig_file.write_text("not valid json", encoding="utf-8")
367+
with pytest.raises(json.JSONDecodeError):
368+
manifest.Manifest.from_signature(sig_file)
369+
370+
def test_from_signature_missing_envelope(self, tmp_path):
371+
import json
372+
373+
sig_file = tmp_path / "missing_envelope.sig"
374+
sig_file.write_text("{}", encoding="utf-8")
375+
with pytest.raises(ValueError, match="does not contain a DSSE envelope"):
376+
manifest.Manifest.from_signature(sig_file)

0 commit comments

Comments
 (0)