Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions tests/repository_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@
from __future__ import annotations

import datetime
import hashlib
import logging
import os
import tempfile
from dataclasses import dataclass, field
from typing import TYPE_CHECKING
from urllib import parse

import securesystemslib.hash as sslib_hash
from securesystemslib.signer import CryptoSigner, Signer

from tuf.api.exceptions import DownloadHTTPError
Expand Down Expand Up @@ -80,6 +80,8 @@

SPEC_VER = ".".join(SPECIFICATION_VERSION)

_DEFAULT_HASH_ALGORITHM = "sha256"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO it's okay to make this a local default, and not re-use the one newly added to _payload.py, so that we can keep the latter internal.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, I'd be fine with just a magic value as well: it's not really even a default, it's just what this repository decided to use



@dataclass
class FetchTracker:
Expand Down Expand Up @@ -292,9 +294,9 @@ def _compute_hashes_and_length(
self, role: str
) -> tuple[dict[str, str], int]:
data = self.fetch_metadata(role)
digest_object = sslib_hash.digest(sslib_hash.DEFAULT_HASH_ALGORITHM)
digest_object = hashlib.new(_DEFAULT_HASH_ALGORITHM)
digest_object.update(data)
hashes = {sslib_hash.DEFAULT_HASH_ALGORITHM: digest_object.hexdigest()}
hashes = {_DEFAULT_HASH_ALGORITHM: digest_object.hexdigest()}
return hashes, len(data)

def update_timestamp(self) -> None:
Expand Down
5 changes: 1 addition & 4 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from typing import ClassVar

from securesystemslib import exceptions as sslib_exceptions
from securesystemslib import hash as sslib_hash
from securesystemslib.signer import (
CryptoSigner,
Key,
Expand Down Expand Up @@ -958,9 +957,7 @@ def test_targetfile_from_file(self) -> None:
# Test with a non-existing file
file_path = os.path.join(self.repo_dir, Targets.type, "file123.txt")
with self.assertRaises(FileNotFoundError):
TargetFile.from_file(
file_path, file_path, [sslib_hash.DEFAULT_HASH_ALGORITHM]
)
TargetFile.from_file(file_path, file_path, ["sha256"])

# Test with an unsupported algorithm
file_path = os.path.join(self.repo_dir, Targets.type, "file1.txt")
Expand Down
59 changes: 39 additions & 20 deletions tuf/api/_payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@

import abc
import fnmatch
import hashlib
import io
import logging
import sys
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import (
Expand All @@ -21,7 +23,6 @@
)

from securesystemslib import exceptions as sslib_exceptions
from securesystemslib import hash as sslib_hash
from securesystemslib.signer import Key, Signature

from tuf.api.exceptions import LengthOrHashMismatchError, UnsignedMetadataError
Expand All @@ -34,6 +35,9 @@
_TARGETS = "targets"
_TIMESTAMP = "timestamp"

_DEFAULT_HASH_ALGORITHM = "sha256"
_BLAKE_HASH_ALGORITHM = "blake2b-256"

# We aim to support SPECIFICATION_VERSION and require the input metadata
# files to have the same major version (the first number) as ours.
SPECIFICATION_VERSION = ["1", "0", "31"]
Expand All @@ -45,6 +49,30 @@
T = TypeVar("T", "Root", "Timestamp", "Snapshot", "Targets")


def _hash(algo: str) -> Any: # noqa: ANN401
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Python's hashlib does not make the Hash type public, hence I'm using Any and ask the linter for forgiveness.

"""Returns new hash object, supporting custom "blake2b-256" algo name."""
if algo == _BLAKE_HASH_ALGORITHM:
return hashlib.blake2b(digest_size=32)

return hashlib.new(algo)


def _file_hash(f: IO[bytes], algo: str) -> Any: # noqa: ANN401
"""Returns hashed file."""
f.seek(0)
if sys.version_info >= (3, 11):
digest = hashlib.file_digest(f, lambda: _hash(algo)) # type: ignore[arg-type]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mypy does not allow IO[bytes] for f here. This may be related to python/typing#659. Ignore seems fair.


else:
# Fallback for older Pythons. Chunk size is taken from the previously
# used and now deprecated `securesystemslib.hash.digest_fileobject`.
digest = _hash(algo)
for chunk in iter(lambda: f.read(4096), b""):
digest.update(chunk)

return digest


class Signed(metaclass=abc.ABCMeta):
"""A base class for the signed part of TUF metadata.

Expand Down Expand Up @@ -664,19 +692,15 @@ def _verify_hashes(
data: bytes | IO[bytes], expected_hashes: dict[str, str]
) -> None:
"""Verify that the hash of ``data`` matches ``expected_hashes``."""
is_bytes = isinstance(data, bytes)
for algo, exp_hash in expected_hashes.items():
try:
if is_bytes:
digest_object = sslib_hash.digest(algo)
if isinstance(data, bytes):
digest_object = _hash(algo)
digest_object.update(data)
else:
# if data is not bytes, assume it is a file object
digest_object = sslib_hash.digest_fileobject(data, algo)
except (
sslib_exceptions.UnsupportedAlgorithmError,
sslib_exceptions.FormatError,
) as e:
digest_object = _file_hash(data, algo)
except (ValueError, TypeError) as e:
raise LengthOrHashMismatchError(
f"Unsupported algorithm '{algo}'"
) from e
Expand Down Expand Up @@ -731,21 +755,16 @@ def _get_length_and_hashes(
hashes = {}

if hash_algorithms is None:
hash_algorithms = [sslib_hash.DEFAULT_HASH_ALGORITHM]
hash_algorithms = [_DEFAULT_HASH_ALGORITHM]

for algorithm in hash_algorithms:
try:
if isinstance(data, bytes):
digest_object = sslib_hash.digest(algorithm)
digest_object = _hash(algorithm)
digest_object.update(data)
else:
digest_object = sslib_hash.digest_fileobject(
data, algorithm
)
except (
sslib_exceptions.UnsupportedAlgorithmError,
sslib_exceptions.FormatError,
) as e:
digest_object = _file_hash(data, algorithm)
except (ValueError, TypeError) as e:
Copy link
Member

@jku jku Mar 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the different functionality is a little annoying here (you need to call update on one digest_object but not the other)...

  • If the _hash and _file_hash functions directly returned the hexdigest this code would be simpler and the functions would be properly annotated
  • on the other hand then _hash() would need a data argument (meaning you would have to split the lambda needed by hashlib.file_digest() into another function)

I don't really know what is the simplest... up to you

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does 535a189 look better? I don't have very strong feelings about either way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No passionate argument here either. I think I like this new version better.

raise ValueError(f"Unsupported algorithm '{algorithm}'") from e

hashes[algorithm] = digest_object.hexdigest()
Expand Down Expand Up @@ -1150,7 +1169,7 @@ def is_delegated_path(self, target_filepath: str) -> bool:
if self.path_hash_prefixes is not None:
# Calculate the hash of the filepath
# to determine in which bin to find the target.
digest_object = sslib_hash.digest(algorithm="sha256")
digest_object = hashlib.new(name="sha256")
digest_object.update(target_filepath.encode("utf-8"))
target_filepath_hash = digest_object.hexdigest()

Expand Down Expand Up @@ -1269,7 +1288,7 @@ def get_role_for_target(self, target_filepath: str) -> str:
target_filepath: URL path to a target file, relative to a base
targets URL.
"""
hasher = sslib_hash.digest(algorithm="sha256")
hasher = hashlib.new(name="sha256")
hasher.update(target_filepath.encode("utf-8"))

# We can't ever need more than 4 bytes (32 bits).
Expand Down