|
48 | 48 | from typing import Optional |
49 | 49 |
|
50 | 50 | from model_signing import hashing |
| 51 | +from model_signing import manifest |
51 | 52 | from model_signing._signing import sign_certificate as certificate |
52 | 53 | from model_signing._signing import sign_ec_key as ec_key |
53 | 54 | from model_signing._signing import sign_sigstore as sigstore |
@@ -75,6 +76,52 @@ def sign(model_path: hashing.PathLike, signature_path: hashing.PathLike): |
75 | 76 | Config().sign(model_path, signature_path) |
76 | 77 |
|
77 | 78 |
|
| 79 | +def sign_incremental( |
| 80 | + model_path: hashing.PathLike, |
| 81 | + old_signature_path: hashing.PathLike, |
| 82 | + new_signature_path: hashing.PathLike, |
| 83 | + *, |
| 84 | + files_to_hash: Optional[Iterable[hashing.PathLike]] = None, |
| 85 | +): |
| 86 | + """Signs a model incrementally, only re-hashing changed files. |
| 87 | +
|
| 88 | + This function provides a convenient way to sign large models where only |
| 89 | + a small subset of files have changed. Instead of re-hashing the entire |
| 90 | + model (which can take hours for multi-hundred GB models), it reuses |
| 91 | + digests from the previous signature for unchanged files and only hashes |
| 92 | + new or modified files. |
| 93 | +
|
| 94 | + In this default configuration we sign using Sigstore. |
| 95 | +
|
| 96 | + Usage example: |
| 97 | + # User modified README.md in a 500GB model |
| 98 | + sign_incremental( |
| 99 | + model_path="huge-model/", |
| 100 | + old_signature_path="model.sig.old", |
| 101 | + new_signature_path="model.sig.new", |
| 102 | + files_to_hash=["huge-model/README.md"] |
| 103 | + ) |
| 104 | +
|
| 105 | + Args: |
| 106 | + model_path: The path to the model to sign. |
| 107 | + old_signature_path: The path to the previous signature. The manifest |
| 108 | + from this signature will be extracted and used for incremental |
| 109 | + hashing. |
| 110 | + new_signature_path: The path where the new signature will be written. |
| 111 | + files_to_hash: Optional list of files that changed and need to be |
| 112 | + re-hashed. If None, only new files (not in old signature) will |
| 113 | + be hashed. Existing files will have their digests reused. |
| 114 | + To detect changed files, use git diff or similar tools. |
| 115 | +
|
| 116 | + Raises: |
| 117 | + FileNotFoundError: If old_signature_path doesn't exist. |
| 118 | + ValueError: If old_signature_path cannot be parsed. |
| 119 | + """ |
| 120 | + Config().sign_incremental( |
| 121 | + model_path, old_signature_path, new_signature_path, files_to_hash=files_to_hash |
| 122 | + ) |
| 123 | + |
| 124 | + |
78 | 125 | class Config: |
79 | 126 | """Configuration to use when signing models. |
80 | 127 |
|
@@ -109,6 +156,57 @@ def sign( |
109 | 156 | signature = self._signer.sign(payload) |
110 | 157 | signature.write(pathlib.Path(signature_path)) |
111 | 158 |
|
| 159 | + def sign_incremental( |
| 160 | + self, |
| 161 | + model_path: hashing.PathLike, |
| 162 | + old_signature_path: hashing.PathLike, |
| 163 | + new_signature_path: hashing.PathLike, |
| 164 | + *, |
| 165 | + files_to_hash: Optional[Iterable[hashing.PathLike]] = None, |
| 166 | + ): |
| 167 | + """Signs a model incrementally using the current configuration. |
| 168 | +
|
| 169 | + This method extracts the manifest from an existing signature and |
| 170 | + configures incremental hashing to reuse digests for unchanged files. |
| 171 | + Only new or modified files are re-hashed, providing significant |
| 172 | + performance improvements for large models. |
| 173 | +
|
| 174 | + Args: |
| 175 | + model_path: The path to the model to sign. |
| 176 | + old_signature_path: The path to the previous signature. |
| 177 | + new_signature_path: The path where the new signature will be written. |
| 178 | + files_to_hash: Optional list of files that changed and need to be |
| 179 | + re-hashed. If None, only new files will be hashed. |
| 180 | +
|
| 181 | + Raises: |
| 182 | + FileNotFoundError: If old_signature_path doesn't exist. |
| 183 | + ValueError: If old_signature_path cannot be parsed. |
| 184 | + """ |
| 185 | + # Extract manifest from old signature |
| 186 | + old_manifest = manifest.Manifest.from_signature( |
| 187 | + pathlib.Path(old_signature_path) |
| 188 | + ) |
| 189 | + |
| 190 | + # Configure incremental hashing |
| 191 | + self._hashing_config.use_incremental_serialization(old_manifest) |
| 192 | + |
| 193 | + # Convert files_to_hash to pathlib.Path objects if provided |
| 194 | + paths_to_hash = None |
| 195 | + if files_to_hash is not None: |
| 196 | + paths_to_hash = [pathlib.Path(f) for f in files_to_hash] |
| 197 | + |
| 198 | + # Hash the model incrementally |
| 199 | + new_manifest = self._hashing_config.hash( |
| 200 | + model_path, files_to_hash=paths_to_hash |
| 201 | + ) |
| 202 | + |
| 203 | + # Sign the new manifest |
| 204 | + if not self._signer: |
| 205 | + self.use_sigstore_signer() |
| 206 | + payload = signing.Payload(new_manifest) |
| 207 | + signature = self._signer.sign(payload) |
| 208 | + signature.write(pathlib.Path(new_signature_path)) |
| 209 | + |
112 | 210 | def set_hashing_config(self, hashing_config: hashing.Config) -> Self: |
113 | 211 | """Sets the new configuration for hashing models. |
114 | 212 |
|
|
0 commit comments