Skip to content

Commit a3194d8

Browse files
Emrick Donadeiedonadei
authored andcommitted
Add sign_incremental() convenience API for incremental signing
Provides high-level convenience functions for incremental model signing that combine all the pieces: manifest extraction, incremental hashing, and signing. Two levels of API: 1. Simple function API: sign_incremental( model_path="huge-model/", old_signature_path="model.sig.old", new_signature_path="model.sig.new", files_to_hash=["huge-model/README.md"] ) 2. Configurable class API: Config().use_elliptic_key_signer(private_key="key").sign_incremental( model_path="huge-model/", old_signature_path="model.sig.old", new_signature_path="model.sig.new", files_to_hash=["huge-model/README.md"] ) Both APIs: - Extract manifest from old signature automatically - Configure incremental hashing - Hash only changed/new files - Sign the new manifest - Write the new signature Also added set_allow_symlinks() method to IncrementalSerializer to maintain compatibility with the hashing Config class, which calls this method before serialization. This makes it trivial for users to incrementally sign large models where only a few files changed, avoiding hours of re-hashing. Related to issue #160 - API for incremental model re-hashing Signed-off-by: Emrick Donadei <[email protected]>
1 parent afcc162 commit a3194d8

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

src/model_signing/_serialization/incremental.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@ def __init__(
9494
)
9595
self._is_blake3 = hasher.digest_name == "blake3"
9696

97+
def set_allow_symlinks(self, allow_symlinks: bool) -> None:
98+
"""Set whether following symlinks is allowed."""
99+
self._allow_symlinks = allow_symlinks
100+
hasher = self._hasher_factory(pathlib.Path())
101+
self._serialization_description = manifest._FileSerialization(
102+
hasher.digest_name, self._allow_symlinks, self._ignore_paths
103+
)
104+
97105
@override
98106
def serialize(
99107
self,

src/model_signing/signing.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from typing import Optional
4949

5050
from model_signing import hashing
51+
from model_signing import manifest
5152
from model_signing._signing import sign_certificate as certificate
5253
from model_signing._signing import sign_ec_key as ec_key
5354
from model_signing._signing import sign_sigstore as sigstore
@@ -75,6 +76,52 @@ def sign(model_path: hashing.PathLike, signature_path: hashing.PathLike):
7576
Config().sign(model_path, signature_path)
7677

7778

79+
def sign_incremental(
80+
model_path: hashing.PathLike,
81+
old_signature_path: hashing.PathLike,
82+
new_signature_path: hashing.PathLike,
83+
*,
84+
files_to_hash: Optional[Iterable[hashing.PathLike]] = None,
85+
):
86+
"""Signs a model incrementally, only re-hashing changed files.
87+
88+
This function provides a convenient way to sign large models where only
89+
a small subset of files have changed. Instead of re-hashing the entire
90+
model (which can take hours for multi-hundred GB models), it reuses
91+
digests from the previous signature for unchanged files and only hashes
92+
new or modified files.
93+
94+
In this default configuration we sign using Sigstore.
95+
96+
Usage example:
97+
# User modified README.md in a 500GB model
98+
sign_incremental(
99+
model_path="huge-model/",
100+
old_signature_path="model.sig.old",
101+
new_signature_path="model.sig.new",
102+
files_to_hash=["huge-model/README.md"]
103+
)
104+
105+
Args:
106+
model_path: The path to the model to sign.
107+
old_signature_path: The path to the previous signature. The manifest
108+
from this signature will be extracted and used for incremental
109+
hashing.
110+
new_signature_path: The path where the new signature will be written.
111+
files_to_hash: Optional list of files that changed and need to be
112+
re-hashed. If None, only new files (not in old signature) will
113+
be hashed. Existing files will have their digests reused.
114+
To detect changed files, use git diff or similar tools.
115+
116+
Raises:
117+
FileNotFoundError: If old_signature_path doesn't exist.
118+
ValueError: If old_signature_path cannot be parsed.
119+
"""
120+
Config().sign_incremental(
121+
model_path, old_signature_path, new_signature_path, files_to_hash=files_to_hash
122+
)
123+
124+
78125
class Config:
79126
"""Configuration to use when signing models.
80127
@@ -109,6 +156,57 @@ def sign(
109156
signature = self._signer.sign(payload)
110157
signature.write(pathlib.Path(signature_path))
111158

159+
def sign_incremental(
160+
self,
161+
model_path: hashing.PathLike,
162+
old_signature_path: hashing.PathLike,
163+
new_signature_path: hashing.PathLike,
164+
*,
165+
files_to_hash: Optional[Iterable[hashing.PathLike]] = None,
166+
):
167+
"""Signs a model incrementally using the current configuration.
168+
169+
This method extracts the manifest from an existing signature and
170+
configures incremental hashing to reuse digests for unchanged files.
171+
Only new or modified files are re-hashed, providing significant
172+
performance improvements for large models.
173+
174+
Args:
175+
model_path: The path to the model to sign.
176+
old_signature_path: The path to the previous signature.
177+
new_signature_path: The path where the new signature will be written.
178+
files_to_hash: Optional list of files that changed and need to be
179+
re-hashed. If None, only new files will be hashed.
180+
181+
Raises:
182+
FileNotFoundError: If old_signature_path doesn't exist.
183+
ValueError: If old_signature_path cannot be parsed.
184+
"""
185+
# Extract manifest from old signature
186+
old_manifest = manifest.Manifest.from_signature(
187+
pathlib.Path(old_signature_path)
188+
)
189+
190+
# Configure incremental hashing
191+
self._hashing_config.use_incremental_serialization(old_manifest)
192+
193+
# Convert files_to_hash to pathlib.Path objects if provided
194+
paths_to_hash = None
195+
if files_to_hash is not None:
196+
paths_to_hash = [pathlib.Path(f) for f in files_to_hash]
197+
198+
# Hash the model incrementally
199+
new_manifest = self._hashing_config.hash(
200+
model_path, files_to_hash=paths_to_hash
201+
)
202+
203+
# Sign the new manifest
204+
if not self._signer:
205+
self.use_sigstore_signer()
206+
payload = signing.Payload(new_manifest)
207+
signature = self._signer.sign(payload)
208+
signature.write(pathlib.Path(new_signature_path))
209+
112210
def set_hashing_config(self, hashing_config: hashing.Config) -> Self:
113211
"""Sets the new configuration for hashing models.
114212

0 commit comments

Comments
 (0)