-
Notifications
You must be signed in to change notification settings - Fork 49
Adding reader_options
kwargs to open_virtual_dataset.
#67
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 26 commits
4c6cb63
adf311a
ba5ac6d
ea30914
448800b
8c5dff7
6cd77ce
f0daafe
ed3d0f4
beec724
e669841
09f89a6
e4db860
ba8b1e3
b12d32c
f9478b9
6958b59
aefa22d
464ffd3
d108978
5cc5ecd
3509a1f
80cf22b
a3fc72e
0235f51
1e9e2fe
55031f9
6a3d7be
5aec9db
83b3c4b
a143cf4
9d124ef
3a29b41
b9c056a
13fc295
4f766d9
e6f047f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,4 +13,4 @@ dependencies: | |
- "sphinx_design" | ||
- "sphinx_togglebutton" | ||
- "sphinx-autodoc-typehints" | ||
- -e .. | ||
- -e "..[test]" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from typing import Optional, Union | ||
|
||
# TODO: importing fsspec and s3fs to get typing. Is there a better way incase these are optional deps? | ||
from s3fs.core import S3File | ||
from fsspec.implementations.local import LocalFileOpener | ||
norlandrhagen marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
|
||
def _fsspec_openfile_from_filepath(*, filepath: str, reader_options: Optional[dict] = {'storage_options':{'key':'', 'secret':'', 'anon':True}}) -> Union[S3File, LocalFileOpener]: | ||
"""Utility function to facilitate reading remote file paths using fsspec. | ||
:param filepath: Input filepath | ||
:type filepath: str | ||
:param reader_options: Dict containing options to pass to fsspec file reader. Default: {'storage_options':{'key':'', 'secret':'', 'anon':True}} | ||
:type reader_options: Optional[dict] | ||
:rtype: Union[S3File, LocalFileOpener] | ||
norlandrhagen marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
""" | ||
import fsspec | ||
from upath import UPath | ||
|
||
universal_filepath = UPath(filepath) | ||
protocol = universal_filepath.protocol | ||
|
||
# why does UPath give an empty string for a local file protocol :( | ||
# import pdb; pdb.set_trace() | ||
norlandrhagen marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
if protocol == '': | ||
|
||
fpath = fsspec.open(filepath, 'rb').open() | ||
|
||
elif protocol in ["s3"]: | ||
s3_anon_defaults = {'key':'', 'secret':'', 'anon':True} | ||
if not bool(reader_options): | ||
storage_options = s3_anon_defaults | ||
|
||
else: | ||
storage_options = reader_options.get('storage_options') #type: ignore | ||
|
||
# using dict merge operator to add in defaults if keys are not specified | ||
storage_options = s3_anon_defaults | storage_options | ||
|
||
fpath = fsspec.filesystem(protocol, **storage_options).open(filepath) | ||
|
||
else: | ||
raise NotImplementedError("Only local and s3 file protocols are currently supported") | ||
|
||
return fpath |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
from xarray.core.variable import IndexVariable | ||
|
||
import virtualizarr.kerchunk as kerchunk | ||
from virtualizarr.utils import _fsspec_openfile_from_filepath | ||
from virtualizarr.kerchunk import KerchunkStoreRefs, FileType | ||
from virtualizarr.manifests import ChunkManifest, ManifestArray | ||
from virtualizarr.zarr import dataset_to_zarr, attrs_from_zarr_group_json, metadata_from_zarr_json | ||
|
@@ -27,6 +28,7 @@ def open_virtual_dataset( | |
loadable_variables: Optional[Iterable[str]] = None, | ||
indexes: Optional[Mapping[str, Index]] = None, | ||
virtual_array_class=ManifestArray, | ||
reader_options: Optional[dict] = {'storage_options':{'key':'', 'secret':'', 'anon':True}}, | ||
|
||
) -> xr.Dataset: | ||
""" | ||
Open a file or store as an xarray Dataset wrapping virtualized zarr arrays. | ||
|
@@ -55,13 +57,17 @@ def open_virtual_dataset( | |
virtual_array_class | ||
Virtual array class to use to represent the references to the chunks in each on-disk array. | ||
Currently can only be ManifestArray, but once VirtualZarrArray is implemented the default should be changed to that. | ||
reader_options: dict, default {'storage_options':{'key':'', 'secret':'', 'anon':True}} | ||
Dict passed into Kerchunk file readers. Note: Each Kerchunk file reader has distinct arguments, | ||
so ensure reader_options match selected Kerchunk reader arguments. | ||
|
||
Returns | ||
------- | ||
vds | ||
An xarray Dataset containing instances of virtual_array_cls for each variable, or normal lazily indexed arrays for each variable in loadable_variables. | ||
""" | ||
|
||
|
||
if drop_variables is None: | ||
drop_variables = [] | ||
elif isinstance(drop_variables, str): | ||
|
@@ -103,7 +109,9 @@ def open_virtual_dataset( | |
# TODO we are reading a bunch of stuff we know we won't need here, e.g. all of the data variables... | ||
# TODO it would also be nice if we could somehow consolidate this with the reading of the kerchunk references | ||
# TODO really we probably want a dedicated xarray backend that iterates over all variables only once | ||
ds = xr.open_dataset(filepath, drop_variables=drop_variables) | ||
fpath = _fsspec_openfile_from_filepath(filepath=filepath,reader_options=reader_options) | ||
|
||
ds = xr.open_dataset(fpath, drop_variables=drop_variables) | ||
|
||
if indexes is None: | ||
# add default indexes by reading data from file | ||
|
@@ -139,6 +147,7 @@ def open_virtual_dataset( | |
return vds | ||
|
||
|
||
|
||
def open_virtual_dataset_from_v3_store( | ||
storepath: str, | ||
drop_variables: List[str], | ||
|
Uh oh!
There was an error while loading. Please reload this page.