Skip to content

Commit bf46940

Browse files
committed
known_files with None value
1 parent 0cbdbee commit bf46940

File tree

6 files changed

+39
-32
lines changed

6 files changed

+39
-32
lines changed

bioimageio/spec/_internal/io.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import collections.abc
43
import hashlib
54
import io
65
import sys
@@ -833,20 +832,15 @@ def _validate_sha256(self) -> Self:
833832
def validate_sha256(self):
834833
context = get_validation_context()
835834
if (src_str := str(self.source)) in context.known_files:
836-
if isinstance(context.known_files, collections.abc.Mapping):
837-
actual_sha = context.known_files[src_str]
838-
else:
839-
return
835+
actual_sha = context.known_files[src_str]
840836
else:
841837
local_source = download(self.source, sha256=self.sha256).path
842-
if isinstance(context.known_files, collections.abc.Mapping):
843-
actual_sha = get_sha256(local_source)
844-
context.known_files[src_str] = actual_sha
845-
else:
846-
context.known_files.add(src_str)
847-
return
838+
actual_sha = get_sha256(local_source)
839+
context.known_files[src_str] = actual_sha
848840

849-
if self.sha256 == actual_sha:
841+
if actual_sha is None:
842+
return
843+
elif self.sha256 == actual_sha:
850844
pass
851845
elif self.sha256 is None or context.update_hashes:
852846
self.sha256 = actual_sha

bioimageio/spec/_internal/type_guards.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import collections.abc
2-
from typing import Any, Dict, List, Mapping, Sequence, Tuple
2+
from typing import Any, Dict, List, Mapping, Sequence, Set, Tuple
33

44
import numpy as np
55
from numpy.typing import NDArray
@@ -11,6 +11,11 @@ def is_dict(v: Any) -> TypeGuard[Dict[Any, Any]]:
1111
return isinstance(v, dict)
1212

1313

14+
def is_set(v: Any) -> TypeGuard[Set[Any]]:
15+
"""to avoid Set[Unknown]"""
16+
return isinstance(v, set)
17+
18+
1419
def is_kwargs(v: Any) -> TypeGuard[Dict[str, Any]]:
1520
return isinstance(v, dict) and all(
1621
isinstance(k, str) for k in v # pyright: ignore[reportUnknownVariableType]

bioimageio/spec/_internal/url.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ def _validate_url_impl(
2323
) -> pydantic.HttpUrl:
2424

2525
url = str(url)
26+
context = get_validation_context()
27+
if url in context.known_files:
28+
with context.replace(perform_io_checks=False):
29+
return ( # pyright: ignore[reportUnknownVariableType]
30+
# TODO: remove pyright ignore for pydantic > 2.9
31+
pydantic.HttpUrl(url) # pyright: ignore[reportCallIssue]
32+
)
33+
2634
val_url = url
2735

2836
if url.startswith("http://example.com") or url.startswith("https://example.com"):
@@ -113,6 +121,7 @@ def _validate_url_impl(
113121
else:
114122
assert_never(request_mode)
115123

124+
context.known_files[url] = None
116125
return ( # pyright: ignore[reportUnknownVariableType]
117126
# TODO: remove pyright ignore for pydantic > 2.9
118127
pydantic.HttpUrl(url) # pyright: ignore[reportCallIssue]
@@ -128,10 +137,7 @@ class HttpUrl(RootHttpUrl):
128137
def _after_validator(self):
129138
self = super()._after_validator()
130139
context = get_validation_context()
131-
if (
132-
context.perform_io_checks
133-
and str(self._validated) not in context.known_files
134-
):
140+
if context.perform_io_checks:
135141
self._validated = _validate_url(self._validated)
136142
self._exists = True
137143

bioimageio/spec/_internal/validation_context.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from copy import copy
55
from dataclasses import dataclass, field
66
from pathlib import Path
7-
from typing import Dict, List, Literal, Optional, Set, Union
7+
from typing import Dict, List, Literal, Optional, Union
88
from urllib.parse import urlsplit, urlunsplit
99
from zipfile import ZipFile
1010

@@ -28,7 +28,7 @@ class ValidationContextBase:
2828
2929
Existence of local absolute file paths is still being checked."""
3030

31-
known_files: Union[Set[str], Dict[str, Sha256]] = field(default_factory=dict)
31+
known_files: Dict[str, Optional[Sha256]] = field(default_factory=dict)
3232
"""Allows to bypass download and hashing of referenced files."""
3333

3434
update_hashes: bool = False
@@ -112,7 +112,7 @@ def replace( # TODO: probably use __replace__ when py>=3.13
112112
log_warnings: Optional[bool] = None,
113113
file_name: Optional[str] = None,
114114
perform_io_checks: Optional[bool] = None,
115-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
115+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
116116
raise_errors: Optional[bool] = None,
117117
update_hashes: Optional[bool] = None,
118118
) -> Self:

bioimageio/spec/_io.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pathlib import Path
2-
from typing import Dict, Literal, Optional, Set, TextIO, Union, cast, overload
2+
from typing import Dict, Literal, Optional, TextIO, Union, cast, overload
33
from zipfile import ZipFile
44

55
from loguru import logger
@@ -35,7 +35,7 @@ def load_description(
3535
*,
3636
format_version: Literal["latest"],
3737
perform_io_checks: Optional[bool] = None,
38-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
38+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
3939
sha256: Optional[Sha256] = None,
4040
) -> Union[LatestResourceDescr, InvalidDescr]: ...
4141

@@ -47,7 +47,7 @@ def load_description(
4747
*,
4848
format_version: Union[FormatVersionPlaceholder, str] = DISCOVER,
4949
perform_io_checks: Optional[bool] = None,
50-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
50+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
5151
sha256: Optional[Sha256] = None,
5252
) -> Union[ResourceDescr, InvalidDescr]: ...
5353

@@ -58,7 +58,7 @@ def load_description(
5858
*,
5959
format_version: Union[FormatVersionPlaceholder, str] = DISCOVER,
6060
perform_io_checks: Optional[bool] = None,
61-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
61+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
6262
sha256: Optional[Sha256] = None,
6363
) -> Union[ResourceDescr, InvalidDescr]:
6464
"""load a bioimage.io resource description
@@ -73,6 +73,8 @@ def load_description(
7373
absolute file paths is still being checked.
7474
known_files: Allows to bypass download and hashing of referenced files
7575
(even if perform_io_checks is True).
76+
Checked files will be added to this dictionary
77+
with their SHA-256 value.
7678
sha256: Optional SHA-256 value of **source**
7779
7880
Returns:
@@ -107,7 +109,7 @@ def load_model_description(
107109
*,
108110
format_version: Literal["latest"],
109111
perform_io_checks: Optional[bool] = None,
110-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
112+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
111113
sha256: Optional[Sha256] = None,
112114
) -> ModelDescr: ...
113115

@@ -119,7 +121,7 @@ def load_model_description(
119121
*,
120122
format_version: Union[FormatVersionPlaceholder, str] = DISCOVER,
121123
perform_io_checks: Optional[bool] = None,
122-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
124+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
123125
sha256: Optional[Sha256] = None,
124126
) -> AnyModelDescr: ...
125127

@@ -130,7 +132,7 @@ def load_model_description(
130132
*,
131133
format_version: Union[FormatVersionPlaceholder, str] = DISCOVER,
132134
perform_io_checks: Optional[bool] = None,
133-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
135+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
134136
sha256: Optional[Sha256] = None,
135137
) -> AnyModelDescr:
136138
"""same as `load_description`, but addtionally ensures that the loaded
@@ -156,7 +158,7 @@ def load_dataset_description(
156158
*,
157159
format_version: Literal["latest"],
158160
perform_io_checks: Optional[bool] = None,
159-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
161+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
160162
sha256: Optional[Sha256] = None,
161163
) -> DatasetDescr: ...
162164

@@ -168,7 +170,7 @@ def load_dataset_description(
168170
*,
169171
format_version: Union[FormatVersionPlaceholder, str] = DISCOVER,
170172
perform_io_checks: Optional[bool] = None,
171-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
173+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
172174
sha256: Optional[Sha256] = None,
173175
) -> AnyDatasetDescr: ...
174176

@@ -179,7 +181,7 @@ def load_dataset_description(
179181
*,
180182
format_version: Union[FormatVersionPlaceholder, str] = DISCOVER,
181183
perform_io_checks: Optional[bool] = None,
182-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
184+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
183185
sha256: Optional[Sha256] = None,
184186
) -> AnyDatasetDescr:
185187
"""same as `load_description`, but addtionally ensures that the loaded
@@ -231,7 +233,7 @@ def load_description_and_validate_format_only(
231233
*,
232234
format_version: Union[FormatVersionPlaceholder, str] = DISCOVER,
233235
perform_io_checks: Optional[bool] = None,
234-
known_files: Optional[Union[Set[str], Dict[str, Sha256]]] = None,
236+
known_files: Optional[Dict[str, Optional[Sha256]]] = None,
235237
sha256: Optional[Sha256] = None,
236238
) -> ValidationSummary:
237239
"""same as `load_description`, but only return the validation summary.

changelog.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ This changelog includes implementation details and my reference the [changes to
1616
- new validation context `disable_cache` (equivalent to an empty `cache_path` in `settings`)
1717
circumvents caching to disk and keeps downloads in memory only
1818
- new setting `allow_pickle` to control `numpy.load`/`numpy.save` behavior
19-
- allow the `ValidationContext`'s `known_files` to be a set of file names to only check for file existence without comparing file hashes.
19+
- allow the `ValidationContext`'s `known_files` to include `None` values (isntead of known SHA values) to only check for file existence without comparing file hashes.
2020

2121
#### bioimageio.spec 0.5.4.1
2222

0 commit comments

Comments
 (0)