Skip to content
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ Deprecations

Bug fixes
~~~~~~~~~

- ``netcdf`` and ``pydap`` engines no longer incorrectly claim to read all remote URLs preventing
the ``zarr`` backend from reading remote zarr stores without an explicit ``engine=`` argument.
(:pull:`10804`). By `Ian Hunt-Isaak <https://github.com/ianhi`_.

Documentation
~~~~~~~~~~~~~
Expand Down
7 changes: 6 additions & 1 deletion xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,12 @@ class NetCDF4BackendEntrypoint(BackendEntrypoint):

def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
if isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj):
return True
# For remote URIs, check file extension to avoid claiming non-netCDF URLs
# (e.g., remote Zarr stores)
_, ext = os.path.splitext(filename_or_obj.rstrip("/"))
# Accept remote URIs with netCDF extensions or no extension
# (OPeNDAP endpoints often have no extension)
return ext in {".nc", ".nc4", ".cdf", ""}

magic_number = (
bytes(filename_or_obj[:8])
Expand Down
10 changes: 9 additions & 1 deletion xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any

Expand Down Expand Up @@ -209,7 +210,14 @@ class PydapBackendEntrypoint(BackendEntrypoint):
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PydapBackendEntrypoint.html"

def guess_can_open(self, filename_or_obj: T_PathFileOrDataStore) -> bool:
return isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj)
if not (isinstance(filename_or_obj, str) and is_remote_uri(filename_or_obj)):
return False

# Check file extension to avoid claiming non-OPeNDAP URLs (e.g., remote Zarr stores)
_, ext = os.path.splitext(filename_or_obj.rstrip("/"))
# Pydap handles OPeNDAP endpoints, which typically have no extension or .nc/.nc4
# Reject URLs with non-OPeNDAP extensions like .zarr
return ext not in {".zarr", ".zip", ".tar", ".gz"}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 100% sure on this. We could go further and require "dap" to be in the URL

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there's a standard extension for OpenDAP URLs. @Mikejmnez do you know?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked with a co-worker on slack. He said:

There's no standard extension for DAP URLs. Explicitly excluding .zarr seems good enough for this disambiguation.

Copy link
Contributor

@Mikejmnez Mikejmnez Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. Yes, there is no standard extension for opendap urls. OPeNDAP servers produce urls with the filename at the end, but for example NASA does something completely different. Excluding .zarr should be good.

What I am trying to push for this, is an opendap protocol-ization via the URL scheme. This is "dap2://<file_url>" vs "dap4://<file_url>". I already added it to the documentation back then dap2vdap4 Right now, if an opendap begins with http, then it is assumed to be dap2. This is completely on the client side and not a server thing. But pydap and python-netcdf4 support this, some NASA subsetting tools do this. Perhaps this may help separating opendap urls from non-opendap urls

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, actually, Thredds (TDS) does have this "standard" way to specify the protocol that may help to discern between opendap url vs non-opendap url: a TDS dap2 url will have a dodsC in its urls. A TDS dap4 url will have a dap4 in its url. (see here). However, an organization running an opendap server may decide how their own urls are exposed.


def open_dataset(
self,
Expand Down
71 changes: 71 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -7252,6 +7252,77 @@ def test_zarr_entrypoint(tmp_path: Path) -> None:
assert not entrypoint.guess_can_open("something.zarr.txt")


@requires_netCDF4
@requires_pydap
@requires_zarr
def test_remote_url_backend_auto_detection() -> None:
"""
Test that remote URLs are correctly claimed by appropriate backends.

This tests the fix for issue where netCDF4 and pydap backends were
claiming ALL remote URLs, preventing remote Zarr stores from being
auto-detected.

See: https://github.com/pydata/xarray/issues/XXXXX
"""
from xarray.backends.netCDF4_ import NetCDF4BackendEntrypoint
from xarray.backends.pydap_ import PydapBackendEntrypoint
from xarray.backends.zarr import ZarrBackendEntrypoint

netcdf4_entrypoint = NetCDF4BackendEntrypoint()
pydap_entrypoint = PydapBackendEntrypoint()
zarr_entrypoint = ZarrBackendEntrypoint()

# Remote Zarr URLs should be claimed by Zarr backend, not netCDF4/pydap
remote_zarr_urls = [
"https://example.com/store.zarr",
"http://example.com/data.zarr/",
"s3://bucket/path/to/data.zarr",
]

for url in remote_zarr_urls:
assert zarr_entrypoint.guess_can_open(url), f"Zarr should claim {url}"
assert not netcdf4_entrypoint.guess_can_open(url), (
f"NetCDF4 should not claim {url}"
)
assert not pydap_entrypoint.guess_can_open(url), f"Pydap should not claim {url}"

# Remote netCDF URLs with extensions should be claimed by netCDF4, not Zarr
remote_netcdf_urls_with_ext = [
"https://example.com/file.nc",
"http://example.com/data.nc4",
"https://example.com/test.cdf",
]

for url in remote_netcdf_urls_with_ext:
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
assert netcdf4_entrypoint.guess_can_open(url), f"NetCDF4 should claim {url}"

# OPeNDAP endpoints (no extension) should be claimed by both netCDF4 and pydap
opendap_urls = [
"http://opendap.example.com/data",
"https://test.opendap.org/dataset",
]

for url in opendap_urls:
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
assert netcdf4_entrypoint.guess_can_open(url), f"NetCDF4 should claim {url}"
assert pydap_entrypoint.guess_can_open(url), f"Pydap should claim {url}"

# Other file types should not be claimed
other_urls = [
"https://example.com/data.zip",
"https://example.com/data.tar.gz",
]

for url in other_urls:
assert not zarr_entrypoint.guess_can_open(url), f"Zarr should not claim {url}"
assert not netcdf4_entrypoint.guess_can_open(url), (
f"NetCDF4 should not claim {url}"
)
assert not pydap_entrypoint.guess_can_open(url), f"Pydap should not claim {url}"


@requires_netCDF4
@pytest.mark.parametrize("str_type", (str, np.str_))
def test_write_file_from_np_str(str_type: type[str | np.str_], tmpdir: str) -> None:
Expand Down
Loading