@@ -7159,7 +7159,10 @@ def test_netcdf4_entrypoint(tmp_path: Path) -> None:
7159
7159
_check_guess_can_open_and_open (entrypoint , path , engine = "netcdf4" , expected = ds )
7160
7160
_check_guess_can_open_and_open (entrypoint , str (path ), engine = "netcdf4" , expected = ds )
7161
7161
7162
- assert entrypoint .guess_can_open ("http://something/remote" )
7162
+ # Remote URLs without extensions are no longer claimed (stricter detection)
7163
+ assert not entrypoint .guess_can_open ("http://something/remote" )
7164
+ # Remote URLs with netCDF extensions are claimed
7165
+ assert entrypoint .guess_can_open ("http://something/remote.nc" )
7163
7166
assert entrypoint .guess_can_open ("something-local.nc" )
7164
7167
assert entrypoint .guess_can_open ("something-local.nc4" )
7165
7168
assert entrypoint .guess_can_open ("something-local.cdf" )
@@ -7202,6 +7205,10 @@ def test_scipy_entrypoint(tmp_path: Path) -> None:
7202
7205
assert entrypoint .guess_can_open ("something-local.nc.gz" )
7203
7206
assert not entrypoint .guess_can_open ("not-found-and-no-extension" )
7204
7207
assert not entrypoint .guess_can_open (b"not-a-netcdf-file" )
7208
+ # Should not claim .gz files that aren't netCDF
7209
+ assert not entrypoint .guess_can_open ("something.zarr.gz" )
7210
+ assert not entrypoint .guess_can_open ("something.tar.gz" )
7211
+ assert not entrypoint .guess_can_open ("something.txt.gz" )
7205
7212
7206
7213
7207
7214
@requires_h5netcdf
@@ -7252,75 +7259,73 @@ def test_zarr_entrypoint(tmp_path: Path) -> None:
7252
7259
assert not entrypoint .guess_can_open ("something.zarr.txt" )
7253
7260
7254
7261
7262
+ @requires_h5netcdf
7255
7263
@requires_netCDF4
7256
7264
@requires_pydap
7257
7265
@requires_zarr
7258
7266
def test_remote_url_backend_auto_detection () -> None :
7259
7267
"""
7260
- Test that remote URLs are correctly claimed by appropriate backends .
7268
+ Test that remote URLs are correctly selected by the backend resolution system .
7261
7269
7262
- This tests the fix for issue where netCDF4 and pydap backends were
7270
+ This tests the fix for issue where netCDF4, h5netcdf, and pydap backends were
7263
7271
claiming ALL remote URLs, preventing remote Zarr stores from being
7264
7272
auto-detected.
7265
7273
7266
7274
See: https://github.com/pydata/xarray/issues/10801
7267
7275
"""
7268
- from xarray .backends .netCDF4_ import NetCDF4BackendEntrypoint
7269
- from xarray .backends .pydap_ import PydapBackendEntrypoint
7270
- from xarray .backends .zarr import ZarrBackendEntrypoint
7271
-
7272
- netcdf4_entrypoint = NetCDF4BackendEntrypoint ()
7273
- pydap_entrypoint = PydapBackendEntrypoint ()
7274
- zarr_entrypoint = ZarrBackendEntrypoint ()
7275
-
7276
- # Remote Zarr URLs should be claimed by Zarr backend, not netCDF4/pydap
7277
- remote_zarr_urls = [
7278
- "https://example.com/store.zarr" ,
7279
- "http://example.com/data.zarr/" ,
7280
- "s3://bucket/path/to/data.zarr" ,
7276
+ from xarray .backends .plugins import guess_engine
7277
+
7278
+ # Test cases: (url, expected_backend)
7279
+ test_cases = [
7280
+ # Remote Zarr URLs
7281
+ ("https://example.com/store.zarr" , "zarr" ),
7282
+ ("http://example.com/data.zarr/" , "zarr" ),
7283
+ ("s3://bucket/path/to/data.zarr" , "zarr" ),
7284
+ # Remote netCDF URLs (non-DAP) - h5netcdf wins (first in order)
7285
+ ("https://example.com/file.nc" , "h5netcdf" ),
7286
+ ("http://example.com/data.nc4" , "h5netcdf" ),
7287
+ ("https://example.com/test.cdf" , "h5netcdf" ),
7288
+ ("https://example.com/data.nc?var=temperature&time=0" , "h5netcdf" ),
7289
+ # DAP URLs with query parameters - h5netcdf wins (has .nc4 ext, first in order)
7290
+ (
7291
+ "http://test.opendap.org/opendap/dap4/StaggeredGrid.nc4?dap4.ce=/time[0:1:0]" ,
7292
+ "h5netcdf" ,
7293
+ ),
7294
+ # DAP URLs without extensions - pydap wins
7295
+ ("dap2://opendap.earthdata.nasa.gov/collections/dataset" , "pydap" ),
7296
+ ("dap4://opendap.earthdata.nasa.gov/collections/dataset" , "pydap" ),
7297
+ ("DAP2://example.com/dataset" , "pydap" ), # uppercase scheme
7298
+ ("DAP4://example.com/dataset" , "pydap" ), # uppercase scheme
7299
+ ("https://example.com/services/DAP2/dataset" , "pydap" ), # uppercase in path
7300
+ # DAP URLs with .nc extensions - h5netcdf wins (first in order)
7301
+ ("http://test.opendap.org/opendap/dap4/StaggeredGrid.nc4" , "h5netcdf" ),
7302
+ ("https://example.com/DAP4/data.nc" , "h5netcdf" ),
7303
+ ("http://example.com/data/Dap4/file.nc" , "h5netcdf" ),
7304
+ ("s3://bucket/path/to/data.nc" , "h5netcdf" ),
7281
7305
]
7282
7306
7283
- for url in remote_zarr_urls :
7284
- assert zarr_entrypoint . guess_can_open (url ), f"Zarr should claim { url } "
7285
- assert not netcdf4_entrypoint . guess_can_open ( url ) , (
7286
- f"NetCDF4 should not claim { url } "
7307
+ for url , expected_backend in test_cases :
7308
+ engine = guess_engine (url )
7309
+ assert engine == expected_backend , (
7310
+ f"URL { url !r } should select { expected_backend !r } but got { engine !r } "
7287
7311
)
7288
- assert not pydap_entrypoint .guess_can_open (url ), f"Pydap should not claim { url } "
7289
7312
7290
- # Remote netCDF URLs with extensions should be claimed by netCDF4, not Zarr
7291
- remote_netcdf_urls_with_ext = [
7292
- "https://example.com/file.nc" ,
7293
- "http://example.com/data.nc4" ,
7294
- "https://example.com/test.cdf" ,
7313
+ # URLs that should raise ValueError (no backend can open them)
7314
+ invalid_urls = [
7315
+ "http://test.opendap.org/opendap/data/nc/coads_climatology.nc.dap" , # .dap suffix
7316
+ "https://example.com/data.dap" , # .dap suffix
7317
+ "http://opendap.example.com/data" , # no extension, no DAP indicators
7318
+ "https://test.opendap.org/dataset" , # no extension, no DAP indicators
7295
7319
]
7296
7320
7297
- for url in remote_netcdf_urls_with_ext :
7298
- assert not zarr_entrypoint .guess_can_open (url ), f"Zarr should not claim { url } "
7299
- assert netcdf4_entrypoint .guess_can_open (url ), f"NetCDF4 should claim { url } "
7300
-
7301
- # OPeNDAP endpoints (no extension) should be claimed by both netCDF4 and pydap
7302
- opendap_urls = [
7303
- "http://opendap.example.com/data" ,
7304
- "https://test.opendap.org/dataset" ,
7305
- ]
7306
-
7307
- for url in opendap_urls :
7308
- assert not zarr_entrypoint .guess_can_open (url ), f"Zarr should not claim { url } "
7309
- assert netcdf4_entrypoint .guess_can_open (url ), f"NetCDF4 should claim { url } "
7310
- assert pydap_entrypoint .guess_can_open (url ), f"Pydap should claim { url } "
7311
-
7312
- # Other file types should not be claimed
7313
- other_urls = [
7314
- "https://example.com/data.zip" ,
7315
- "https://example.com/data.tar.gz" ,
7316
- ]
7317
-
7318
- for url in other_urls :
7319
- assert not zarr_entrypoint .guess_can_open (url ), f"Zarr should not claim { url } "
7320
- assert not netcdf4_entrypoint .guess_can_open (url ), (
7321
- f"NetCDF4 should not claim { url } "
7322
- )
7323
- assert not pydap_entrypoint .guess_can_open (url ), f"Pydap should not claim { url } "
7321
+ for url in invalid_urls :
7322
+ try :
7323
+ engine = guess_engine (url )
7324
+ raise AssertionError (
7325
+ f"URL { url !r} should not be claimed by any backend, but { engine !r} claimed it"
7326
+ )
7327
+ except ValueError :
7328
+ pass # Expected
7324
7329
7325
7330
7326
7331
@requires_netCDF4
0 commit comments