zarr-developers · TomNicholas · May 14, 2024 · Mar 29, 2024 · Apr 30, 2024 · Apr 30, 2024
diff --git a/virtualizarr/kerchunk.py b/virtualizarr/kerchunk.py
@@ -35,7 +35,8 @@ class FileType(AutoName):
     zarr = auto()
 
 def read_kerchunk_references_from_file(
-    filepath: str, filetype: Optional[FileType]
+    filepath: str, filetype: Optional[FileType],
+    reader_options: Optional[dict] = {'storage_options': {'anon': True}}
 ) -> KerchunkStoreRefs:
     """
     Read a single legacy file and return kerchunk references to its contents.
@@ -47,6 +48,9 @@ def read_kerchunk_references_from_file(
     filetype : FileType, default: None
         Type of file to be opened. Used to determine which kerchunk file format backend to use.
         If not provided will attempt to automatically infer the correct filetype from the the filepath's extension.
+    reader_options: dict, default {'storage_options': {'anon': True}}
+        Dict passed into Kerchunk file readers. Note: Each Kerchunk file reader has distinct arguments,
+        so ensure reader_options match selected Kerchunk reader arguments.
     """
 
     if filetype is None:
@@ -57,24 +61,24 @@ def read_kerchunk_references_from_file(
 
     if filetype.name.lower() == "netcdf3":
         from kerchunk.netCDF3 import NetCDF3ToZarr
-        refs = NetCDF3ToZarr(filepath, inline_threshold=0).translate()
 
+        refs = NetCDF3ToZarr(filepath, inline_threshold=0, **reader_options).translate()
     elif filetype.name.lower() == "netcdf4":
         from kerchunk.hdf import SingleHdf5ToZarr
 
-        refs = SingleHdf5ToZarr(filepath, inline_threshold=0).translate()
+        refs = SingleHdf5ToZarr(filepath, inline_threshold=0, **reader_options).translate()
     elif filetype.name.lower() == "grib":
         # TODO Grib files should be handled as a DataTree object
         # see https://github.com/TomNicholas/VirtualiZarr/issues/11
         raise NotImplementedError(f"Unsupported file type: {filetype}")
     elif filetype.name.lower() == "tiff":
         from kerchunk.tiff import tiff_to_zarr
 
-        refs = tiff_to_zarr(filepath, inline_threshold=0)
-    elif filetype.name.lower() == "fits":
+        refs = tiff_to_zarr(filepath, inline_threshold=0, **reader_options)
+    elif filetype.lower() == "fits":
         from kerchunk.fits import process_file
 
-        refs = process_file(filepath, inline_threshold=0)
+        refs = process_file(filepath, inline_threshold=0, **reader_options)
     else:
         raise NotImplementedError(f"Unsupported file type: {filetype.name}")
 

diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py
@@ -4,6 +4,7 @@
 import xarray as xr
 import xarray.testing as xrt
 from xarray.core.indexes import Index
+import pytest
 
 from virtualizarr import open_virtual_dataset
 from virtualizarr.manifests import ChunkManifest, ManifestArray
@@ -271,6 +272,15 @@ def test_combine_by_coords(self, netcdf4_files):
         assert combined_vds.xindexes["time"].to_pandas_index().is_monotonic_increasing
 
 
+pytest.importorskip("s3fs")
+@pytest.mark.xfail(reason="currently should xfail for None filetype and None indexes.",run=False)
+@pytest.mark.parametrize("filetype", ['netcdf4', None], ids=["netcdf4 filetype", "None filetype"])
+@pytest.mark.parametrize("indexes", [None, {}], ids=["None index", "empty dict index"])
+def test_anon_read_s3(filetype, indexes):
+    fpath = 's3://nex-gddp-cmip6/NEX-GDDP-CMIP6/CESM2/historical/r4i1p1f1/pr/pr_day_CESM2_historical_r4i1p1f1_gn_2010.nc'
+    assert open_virtual_dataset(fpath, filetype=filetype, indexes=indexes, reader_options={'storage_options': {'anon': True}})
+
+
 class TestLoadVirtualDataset:
     def test_loadable_variables(self, netcdf4_file):
         vars_to_load = ['air', 'time']

diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py
@@ -25,6 +25,7 @@ def open_virtual_dataset(
     loadable_variables: Optional[Iterable[str]] = None,
     indexes: Optional[Mapping[str, Index]] = None,
     virtual_array_class=ManifestArray,
+    reader_options: Optional[dict] = {'storage_options': {'anon': True}}
 ) -> xr.Dataset:
     """
     Open a file or store as an xarray Dataset wrapping virtualized zarr arrays.
@@ -53,6 +54,9 @@ def open_virtual_dataset(
     virtual_array_class
         Virtual array class to use to represent the references to the chunks in each on-disk array.
         Currently can only be ManifestArray, but once VirtualZarrArray is implemented the default should be changed to that.
+    reader_options: dict, default {'storage_options': {'anon': True}}
+        Dict passed into Kerchunk file readers. Note: Each Kerchunk file reader has distinct arguments,
+        so ensure reader_options match selected Kerchunk reader arguments.
 
     Returns
     -------
@@ -81,6 +85,7 @@ def open_virtual_dataset(
     vds_refs = kerchunk.read_kerchunk_references_from_file(
         filepath=filepath,
         filetype=filetype,
+        reader_options=reader_options,
     )
     virtual_vars = virtual_vars_from_kerchunk_refs(
         vds_refs,