Skip to content

Commit 9a6fe6f

Browse files
authored
Remove path suffix check (#659)
* remove path suffix check * remove test * remove failing test * add test for #593 * release note
1 parent ea426d3 commit 9a6fe6f

File tree

3 files changed

+12
-35
lines changed

3 files changed

+12
-35
lines changed

docs/releases.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
By [Tom Nicholas](https://github.com/TomNicholas).
5050
- Fixed bug causing coordinates to be demoted to data variables when writing to Icechunk ([#574](https://github.com/zarr-developers/VirtualiZarr/issues/574), [#588](https://github.com/zarr-developers/VirtualiZarr/pull/588))
5151
By [Tom Nicholas](https://github.com/TomNicholas).
52+
- Removed checks forbidding paths in virtual references without file suffixes ([#659](https://github.com/zarr-developers/VirtualiZarr/pull/659))
53+
By [Tom Nicholas](https://github.com/TomNicholas).
5254
- Fixed bug when indexing a scalar ManifestArray with an ellipsis([#596](https://github.com/zarr-developers/VirtualiZarr/issues/596), [#641](https://github.com/zarr-developers/VirtualiZarr/pull/641))
5355
By [Max Jones](https://github.com/maxrjones) and [Tom Nicholas](https://github.com/TomNicholas).
5456

virtualizarr/manifests/manifest.py

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from collections.abc import ItemsView, Iterable, Iterator, KeysView, ValuesView
33
from pathlib import PosixPath
44
from typing import Any, Callable, NewType, Tuple, TypedDict, cast
5-
from urllib.parse import urlparse, urlunparse
65

76
import numpy as np
87

@@ -24,6 +23,8 @@
2423
"cos://",
2524
"minio://",
2625
"file:///",
26+
"http://",
27+
"https://",
2728
}
2829

2930

@@ -68,39 +69,13 @@ def validate_and_normalize_path_to_uri(path: str, fs_root: str | None = None) ->
6869
if path == "":
6970
# (empty paths are allowed through as they represent missing chunks)
7071
return path
71-
72-
# TODO ideally we would just use cloudpathlib.AnyPath to handle all types of paths but that would require extra dependencies, see https://github.com/drivendataorg/cloudpathlib/issues/489#issuecomment-2504725280
73-
74-
if path.startswith("http://") or path.startswith("https://"):
75-
# hopefully would raise if given a malformed URL
76-
components = urlparse(path)
77-
78-
if not PosixPath(components.path).suffix:
79-
raise ValueError(
80-
f"entries in the manifest must be paths to files, but this path has no file suffix: {path}"
81-
)
82-
83-
return urlunparse(components)
84-
8572
elif any(path.startswith(prefix) for prefix in VALID_URI_PREFIXES):
86-
# Question: This feels fragile, is there a better way to ID a Zarr
87-
if not PosixPath(path).suffix and "zarr" not in path:
88-
raise ValueError(
89-
f"entries in the manifest must be paths to files, but this path has no file suffix: {path}"
90-
)
91-
9273
return path # path is already in URI form
93-
9474
else:
9575
# must be a posix filesystem path (absolute or relative)
9676
# using PosixPath here ensures a clear error would be thrown on windows (whose paths and platform are not officially supported)
9777
_path = PosixPath(path)
9878

99-
if not _path.suffix and "zarr" not in path:
100-
raise ValueError(
101-
f"entries in the manifest must be paths to files, but this path has no file suffix: {path}"
102-
)
103-
10479
# only posix paths can possibly not be absolute
10580
if not _path.is_absolute():
10681
if fs_root is None:

virtualizarr/tests/test_manifests/test_manifest.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,6 @@ def test_allow_http_urls(self, url):
3131
chunkentry = ChunkEntry.with_validation(path=url, offset=100, length=100)
3232
assert chunkentry["path"] == url
3333

34-
@pytest.mark.parametrize(
35-
"path", ["/directory/file", "s3://bucket/file", "https://site.com/file"]
36-
)
37-
def test_disallow_paths_without_file_suffixes(self, path):
38-
with pytest.raises(ValueError, match="this path has no file suffix"):
39-
ChunkEntry.with_validation(path=path, offset=100, length=100)
40-
4134
@pytest.mark.parametrize(
4235
"path",
4336
[
@@ -47,13 +40,20 @@ def test_disallow_paths_without_file_suffixes(self, path):
4740
reason="cloudpathlib should ideally do stricter validation - see https://github.com/drivendataorg/cloudpathlib/issues/489"
4841
),
4942
),
50-
"https://site.com/###/foo.nc",
5143
],
5244
)
5345
def test_catch_malformed_path(self, path):
5446
with pytest.raises(ValueError):
5547
ChunkEntry.with_validation(path=path, offset=100, length=100)
5648

49+
def test_allow_path_containing_colon(self):
50+
# regression test for issue #593
51+
ChunkEntry.with_validation(
52+
path="file:///folder/wrfout_d01_2024-01-02_01:00:00.nc",
53+
offset=100,
54+
length=100,
55+
)
56+
5757

5858
class TestConvertingRelativePathsUsingFSRoot:
5959
def test_fs_root_must_be_absolute(self):

0 commit comments

Comments
 (0)