Skip to content

Commit 2af21ab

Browse files
committed
fix: be more more caution when claiming a backend can open a URL
1 parent eed12c4 commit 2af21ab

File tree

3 files changed

+86
-2
lines changed

3 files changed

+86
-2
lines changed

xarray/backends/netCDF4_.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,12 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint):
702702

703703
def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
704704
if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
705-
return True
705+
# For remote URIs, check file extension to avoid claiming non-netCDF URLs
706+
# (e.g., remote Zarr stores)
707+
_, ext = os.path.splitext(filename_or_obj.rstrip("/"))
708+
# Accept remote URIs with netCDF extensions or no extension
709+
# (OPeNDAP endpoints often have no extension)
710+
return ext in {".nc", ".nc4", ".cdf", ""}
706711

707712
magic_number = (
708713
bytes(filename_or_obj[:8])

xarray/backends/pydap_.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import os
34
from collections.abc import Iterable
45
from typing import TYPE_CHECKING, Any
56

@@ -209,7 +210,14 @@ class PydapBackendEntrypoint(BackendEntrypoint):
209210
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PydapBackendEntrypoint.html"
210211

211212
def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
212-
return isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj)
213+
if not (isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj)):
214+
return False
215+
216+
# Check file extension to avoid claiming non-OPeNDAP URLs (e.g., remote Zarr stores)
217+
_, ext = os.path.splitext(filename_or_obj.rstrip("/"))
218+
# Pydap handles OPeNDAP endpoints, which typically have no extension or .nc/.nc4
219+
# Reject URLs with non-OPeNDAP extensions like .zarr
220+
return ext not in {".zarr", ".zip", ".tar", ".gz"}
213221

214222
def open_dataset(
215223
self,

xarray/tests/test_backends.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7252,6 +7252,77 @@ def test_zarr_entrypoint(tmp_path: Path) -> None:
72527252
assert not entrypoint.guess_can_open("something.zarr.txt")
72537253

72547254

7255+
@requires_netCDF4
7256+
@requires_pydap
7257+
@requires_zarr
7258+
def test_remote_url_backend_auto_detection() -> None:
7259+
"""
7260+
Test that remote URLs are correctly claimed by appropriate backends.
7261+
7262+
This tests the fix for issue where netCDF4 and pydap backends were
7263+
claiming ALL remote URLs, preventing remote Zarr stores from being
7264+
auto-detected.
7265+
7266+
See: https://github.com/pydata/xarray/issues/XXXXX
7267+
"""
7268+
from xarray.backends.netCDF4_ import NetCDF4BackendEntrypoint
7269+
from xarray.backends.pydap_ import PydapBackendEntrypoint
7270+
from xarray.backends.zarr import ZarrBackendEntrypoint
7271+
7272+
netcdf4_entrypoint = NetCDF4BackendEntrypoint()
7273+
pydap_entrypoint = PydapBackendEntrypoint()
7274+
zarr_entrypoint = ZarrBackendEntrypoint()
7275+
7276+
# Remote Zarr URLs should be claimed by Zarr backend, not netCDF4/pydap
7277+
remote_zarr_urls = [
7278+
"https://example.com/store.zarr",
7279+
"http://example.com/data.zarr/",
7280+
"s3://bucket/path/to/data.zarr",
7281+
]
7282+
7283+
for url in remote_zarr_urls:
7284+
assert zarr_entrypoint.guess_can_open(url), f"Zarr should claim {url}"
7285+
assert not netcdf4_entrypoint.guess_can_open(url), (
7286+
f"NetCDF4 should not claim {url}"
7287+
)
7288+
assert not pydap_entrypoint.guess_can_open(url), f"Pydap should not claim {url}"
7289+
7290+
# Remote netCDF URLs with extensions should be claimed by netCDF4, not Zarr
7291+
remote_netcdf_urls_with_ext = [
7292+
"https://example.com/file.nc",
7293+
"http://example.com/data.nc4",
7294+
"https://example.com/test.cdf",
7295+
]
7296+
7297+
for url in remote_netcdf_urls_with_ext:
7298+
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
7299+
assert netcdf4_entrypoint.guess_can_open(url), f"NetCDF4 should claim {url}"
7300+
7301+
# OPeNDAP endpoints (no extension) should be claimed by both netCDF4 and pydap
7302+
opendap_urls = [
7303+
"http://opendap.example.com/data",
7304+
"https://test.opendap.org/dataset",
7305+
]
7306+
7307+
for url in opendap_urls:
7308+
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
7309+
assert netcdf4_entrypoint.guess_can_open(url), f"NetCDF4 should claim {url}"
7310+
assert pydap_entrypoint.guess_can_open(url), f"Pydap should claim {url}"
7311+
7312+
# Other file types should not be claimed
7313+
other_urls = [
7314+
"https://example.com/data.zip",
7315+
"https://example.com/data.tar.gz",
7316+
]
7317+
7318+
for url in other_urls:
7319+
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
7320+
assert not netcdf4_entrypoint.guess_can_open(url), (
7321+
f"NetCDF4 should not claim {url}"
7322+
)
7323+
assert not pydap_entrypoint.guess_can_open(url), f"Pydap should not claim {url}"
7324+
7325+
72557326
@requires_netCDF4
72567327
@pytest.mark.parametrize("str_type", (str, np.str_))
72577328
def test_write_file_from_np_str(str_type: type[str | np.str_], tmpdir: str) -> None:

0 commit comments

Comments
 (0)