Skip to content

Commit 46ab2cd

Browse files
committed
improve extract_resource_package
and use spec.resolve_rdf_source_and_type
1 parent ee09857 commit 46ab2cd

File tree

4 files changed

+74
-105
lines changed

4 files changed

+74
-105
lines changed

bioimageio/core/resource_io/common.py

Lines changed: 0 additions & 12 deletions
This file was deleted.

bioimageio/core/resource_io/io_.py

Lines changed: 68 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,98 @@
11
import os
22
import pathlib
3+
import shutil
34
import warnings
5+
import zipfile
46
from copy import deepcopy
5-
from typing import Any, Dict, Optional, Sequence, Tuple, Union
7+
from typing import Dict, IO, Optional, Sequence, Tuple, Union
68
from zipfile import ZIP_DEFLATED, ZipFile
79

8-
from marshmallow import ValidationError, missing
10+
from marshmallow import missing
911

1012
from bioimageio import spec
1113
from bioimageio.core.resource_io.nodes import ResourceDescription
14+
from bioimageio.spec.io_ import resolve_rdf_source
1215
from bioimageio.spec.shared import raw_nodes
13-
from bioimageio.spec.shared.common import get_class_name_from_type
16+
from bioimageio.spec.shared.common import BIOIMAGEIO_CACHE_PATH, get_class_name_from_type
1417
from bioimageio.spec.shared.raw_nodes import ResourceDescription as RawResourceDescription
1518
from bioimageio.spec.shared.utils import PathToRemoteUriTransformer
1619
from . import nodes
17-
from .common import BIOIMAGEIO_CACHE_PATH, yaml
18-
from .utils import _download_uri_to_local_path, resolve_local_uri, resolve_raw_resource_description, resolve_uri
20+
from .utils import resolve_raw_resource_description, resolve_uri
1921

20-
21-
ROOT_PATH = "root_path"
2222
serialize_raw_resource_description = spec.io_.serialize_raw_resource_description
2323
save_raw_resource_description = spec.io_.save_raw_resource_description
2424

2525

26+
def extract_resource_package(
27+
source: Union[os.PathLike, IO, str, bytes, raw_nodes.URI]
28+
) -> Tuple[dict, str, pathlib.Path]:
29+
"""extract a zip source to BIOIMAGEIO_CACHE_PATH"""
30+
source, source_name, root = resolve_rdf_source(source)
31+
if isinstance(root, bytes):
32+
raise NotImplementedError("package source was bytes")
33+
34+
cache_folder = BIOIMAGEIO_CACHE_PATH / "extracted_packages"
35+
cache_folder.mkdir(exist_ok=True, parents=True)
36+
37+
if isinstance(root, raw_nodes.URI):
38+
from urllib.request import urlretrieve
39+
40+
package_path = cache_folder / root.scheme / root.authority / root.path.strip("/") / root.query
41+
if (package_path / "rdf.yaml").exists():
42+
download = None
43+
else:
44+
download, header = urlretrieve(str(root))
45+
46+
local_source = download
47+
else:
48+
download = None
49+
local_source = root
50+
package_path = cache_folder / root.relative_to(list(root.parents)[-1])
51+
52+
if local_source is not None:
53+
with zipfile.ZipFile(local_source) as zf:
54+
zf.extractall(package_path)
55+
56+
if not (package_path / "rdf.yaml").exists():
57+
raise FileNotFoundError(f"missing 'rdf.yaml' in {root} extracted from {download}")
58+
59+
if download is not None:
60+
try:
61+
os.remove(download)
62+
except Exception as e:
63+
warnings.warn(f"Could not remove download {download} due to {e}")
64+
65+
assert isinstance(package_path, pathlib.Path)
66+
return source, source_name, package_path
67+
68+
2669
def _replace_relative_paths_for_remote_source(
27-
raw_rd: RawResourceDescription, source: Union[Any, str, raw_nodes.URI]
70+
raw_rd: RawResourceDescription, root: Union[pathlib.Path, raw_nodes.URI, bytes]
2871
) -> RawResourceDescription:
29-
if isinstance(source, raw_nodes.URI) or isinstance(source, str) and source.startswith("http"):
72+
if isinstance(root, raw_nodes.URI):
3073
# for a remote source relative paths are invalid; replace all relative file paths in source with URLs
31-
if isinstance(source, str):
32-
source = raw_nodes.URI(source)
33-
3474
warnings.warn(
35-
f"changing file paths in RDF to URIs due to a remote {source.scheme} source "
75+
f"changing file paths in RDF to URIs due to a remote {root.scheme} source "
3676
"(may result in an invalid node)"
3777
)
38-
raw_rd = PathToRemoteUriTransformer(remote_source=source).transform(raw_rd)
39-
raw_rd.root_path = pathlib.Path() # root_path cannot be URI
78+
raw_rd = PathToRemoteUriTransformer(remote_source=root).transform(raw_rd)
79+
root_path = pathlib.Path() # root_path cannot be URI
80+
elif isinstance(root, pathlib.Path):
81+
if zipfile.is_zipfile(root):
82+
_, _, root_path = extract_resource_package(root)
83+
else:
84+
root_path = root
85+
elif isinstance(root, bytes):
86+
raise NotImplementedError("root as bytes (io)")
87+
else:
88+
raise TypeError(root)
4089

90+
raw_rd.root_path = root_path
4191
return raw_rd
4292

4393

4494
def load_raw_resource_description(
45-
source: Union[os.PathLike, str, dict, raw_nodes.URI, RawResourceDescription]
95+
source: Union[dict, os.PathLike, IO, str, bytes, raw_nodes.URI]
4696
) -> RawResourceDescription:
4797
"""load a raw python representation from a BioImage.IO resource description file (RDF).
4898
Use `load_resource_description` for a more convenient representation.
@@ -53,12 +103,8 @@ def load_raw_resource_description(
53103
Returns:
54104
raw BioImage.IO resource
55105
"""
56-
if isinstance(source, RawResourceDescription):
57-
return source
58-
59-
data, type_ = resolve_rdf_source_and_type(source)
60-
raw_rd = spec.load_raw_resource_description(data, update_to_current_format=True)
61-
raw_rd = _replace_relative_paths_for_remote_source(raw_rd, source)
106+
raw_rd = spec.load_raw_resource_description(source, update_to_current_format=True)
107+
raw_rd = _replace_relative_paths_for_remote_source(raw_rd, raw_rd.root_path)
62108
return raw_rd
63109

64110

@@ -186,26 +232,6 @@ def _get_tmp_package_path(raw_rd: RawResourceDescription, weights_priority_order
186232
return package_path
187233

188234

189-
def extract_resource_package(source: Union[os.PathLike, str, raw_nodes.URI]) -> pathlib.Path:
190-
"""extract a zip source to BIOIMAGEIO_CACHE_PATH"""
191-
local_source = resolve_uri(source)
192-
assert isinstance(local_source, pathlib.Path)
193-
cache_folder = BIOIMAGEIO_CACHE_PATH / "extracted_packages"
194-
cache_folder.mkdir(exist_ok=True, parents=True)
195-
package_path = cache_folder / f"{local_source.stem}"
196-
with ZipFile(local_source) as zf:
197-
zf.extractall(package_path)
198-
199-
for rdf_name in ["rdf.yaml", "model.yaml", "rdf.yml", "model.yml"]:
200-
rdf_path = package_path / rdf_name
201-
if rdf_path.exists():
202-
break
203-
else:
204-
raise FileNotFoundError(local_source / "rdf.yaml")
205-
206-
return rdf_path
207-
208-
209235
def make_zip(
210236
path: os.PathLike, content: Dict[str, Union[str, pathlib.Path]], *, compression: int, compression_level: int
211237
) -> None:
@@ -225,53 +251,3 @@ def make_zip(
225251
myzip.writestr(arc_name, file_or_str_content)
226252
else:
227253
myzip.write(file_or_str_content, arcname=arc_name)
228-
229-
230-
def resolve_rdf_source_and_type(source: Union[os.PathLike, str, dict, raw_nodes.URI]) -> Tuple[dict, str]:
231-
if isinstance(source, dict):
232-
data = source
233-
if ROOT_PATH not in data:
234-
data[ROOT_PATH] = pathlib.Path()
235-
else:
236-
data = get_dict_from_yaml_source(source)
237-
238-
type_ = data.get("type", "model") # todo: remove default 'model' type
239-
240-
return data, type_
241-
242-
243-
def get_dict_from_yaml_source(source: Union[os.PathLike, str, raw_nodes.URI, dict]) -> dict:
244-
if isinstance(source, dict):
245-
if ROOT_PATH not in source:
246-
source[ROOT_PATH] = pathlib.Path()
247-
248-
return source
249-
elif isinstance(source, (str, os.PathLike, raw_nodes.URI)):
250-
source = resolve_local_uri(source, pathlib.Path())
251-
else:
252-
raise TypeError(source)
253-
254-
if isinstance(source, raw_nodes.URI): # remote uri
255-
local_source = _download_uri_to_local_path(source)
256-
root_path = pathlib.Path()
257-
else:
258-
local_source = source
259-
root_path = source.parent
260-
261-
assert isinstance(local_source, pathlib.Path)
262-
if local_source.suffix == ".zip":
263-
local_source = extract_resource_package(local_source)
264-
root_path = local_source.parent
265-
266-
if local_source.suffix == ".yml":
267-
warnings.warn(
268-
"suffix '.yml' is not recommended and will raise a ValidationError in the future. Use '.yaml' instead "
269-
"(https://yaml.org/faq.html)"
270-
)
271-
elif local_source.suffix != ".yaml":
272-
raise ValidationError(f"invalid suffix {local_source.suffix} for source {source}")
273-
274-
data = yaml.load(local_source)
275-
assert isinstance(data, dict)
276-
data[ROOT_PATH] = root_path
277-
return data

bioimageio/core/resource_io/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
from marshmallow import ValidationError
1515

1616
from bioimageio.spec.shared import fields, raw_nodes
17+
from bioimageio.spec.shared.common import BIOIMAGEIO_CACHE_PATH
1718
from bioimageio.spec.shared.utils import GenericRawNode, GenericRawRD, NodeTransformer, NodeVisitor
1819
from . import nodes
19-
from .common import BIOIMAGEIO_CACHE_PATH
2020

2121
GenericResolvedNode = typing.TypeVar("GenericResolvedNode", bound=nodes.Node)
2222
GenericNode = typing.Union[GenericRawNode, GenericResolvedNode]

tests/test_cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
from bioimageio.core import load_resource_description
77

88

9+
def test_validate_model(unet2d_nuclei_broad_model):
10+
ret = subprocess.run(["bioimageio", "validate", unet2d_nuclei_broad_model])
11+
assert ret.returncode == 0
12+
13+
914
def test_cli_test_model(unet2d_nuclei_broad_model):
1015
ret = subprocess.run(["bioimageio", "test-model", unet2d_nuclei_broad_model])
1116
assert ret.returncode == 0

0 commit comments

Comments
 (0)