Skip to content

Recipes that retrieve both netCDF3 and netCDF4 files aren't supported on beam-refactor branch #472

@derekocallaghan

Description

@derekocallaghan

I'm porting the EOOffshore ASCAT recipes to run on the beam-refactor branch. This will involve the retrieval of netCDF3 (older Metop-A/B files) and netCDF4 (Metop-C) files. The previous recipe versions achieved this by specifying

This isn't supported by the beam-refactor branch for the following reasons:

  • Unlike the previous/production xarray_zarr.open_input() which permitted file types to be specified that were not in xarray_zarr.OPENER_MAP, along with engines not listed in xarray_zarr.OPENER_MAP (arguably this was a bug), openers._set_engine() will only use engines listed in openers.OPENER_MAP

if file_type in OPENER_MAP:
if "engine" in kw:
engine_message_base = (
"pangeo-forge-recipes will automatically set the xarray backend for "
f"files of type '{file_type.value}' to '{OPENER_MAP[file_type]}', "
)
warn_matching_msg = engine_message_base + (

def _set_engine(file_type, xr_open_kwargs):
kw = xr_open_kwargs.copy()
if "engine" in kw:
engine_message_base = (
"pangeo-forge-recipes will automatically set the xarray backend for "

elif isinstance(url_or_file_obj, io.IOBase):
# required to make mypy happy
# LocalFileOpener is a subclass of io.IOBase
pass

class NetCDF4DataStore(WritableCFDataStore):
    """Store for reading and writing data via the Python-NetCDF4 library.
    This store supports NetCDF3, NetCDF4 and OpenDAP datasets.
    """

...

    @classmethod
    def open(
        cls,
        filename,
        mode="r",
        format="NETCDF4",
        group=None,
        clobber=True,
        diskless=False,
        persist=False,
        lock=None,
        lock_maker=None,
        autoclose=False,
    ):
        import netCDF4

        if isinstance(filename, os.PathLike):
            filename = os.fspath(filename)

        if not isinstance(filename, str):
            raise ValueError(
                "can only read bytes or file-like objects "
                "with engine='scipy' or 'h5netcdf'"
            )

I've been the following workaround locally which seems to enable support similar to the xarray_zarr implementation:

diff --git a/pangeo_forge_recipes/openers.py b/pangeo_forge_recipes/openers.py
index 5b0a874..82110f5 100644
--- a/pangeo_forge_recipes/openers.py
+++ b/pangeo_forge_recipes/openers.py
@@ -25,7 +25,6 @@ def open_url(
     :param secrets: If provided these secrets will be injected into the URL as a query string.
     :param open_kwargs: Extra arguments passed to fsspec.open.
     """
-
     kw = open_kwargs or {}
     if cache is not None:
         # this has side effects
@@ -45,35 +44,36 @@ OPENER_MAP = {
 
 def _set_engine(file_type, xr_open_kwargs):
     kw = xr_open_kwargs.copy()
-    if "engine" in kw:
-        engine_message_base = (
-            "pangeo-forge-recipes will automatically set the xarray backend for "
-            f"files of type '{file_type.value}' to '{OPENER_MAP[file_type]}', "
-        )
-        warn_matching_msg = engine_message_base + (
-            "which is the same value you have passed via `xarray_open_kwargs`. "
-            f"If this input file is actually of type '{file_type.value}', you can "
-            f"remove `{{'engine': '{kw['engine']}'}}` from `xarray_open_kwargs`. "
-        )
-        error_mismatched_msg = engine_message_base + (
-            f"which is different from the value you have passed via "
-            "`xarray_open_kwargs`. If this input file is actually of type "
-            f"'{file_type.value}', please remove `{{'engine': '{kw['engine']}'}}` "
-            "from `xarray_open_kwargs`. "
-        )
-        engine_message_tail = (
-            f"If this input file is not of type '{file_type.value}', please update"
-            " this recipe by passing a different value to `FilePattern.file_type`."
-        )
-        warn_matching_msg += engine_message_tail
-        error_mismatched_msg += engine_message_tail
-
-        if kw["engine"] == OPENER_MAP[file_type]["engine"]:
-            warnings.warn(warn_matching_msg)
-        elif kw["engine"] != OPENER_MAP[file_type]["engine"]:
-            raise ValueError(error_mismatched_msg)
-    else:
-        kw.update(OPENER_MAP[file_type])
+    if file_type in OPENER_MAP:
+        if "engine" in kw:
+            engine_message_base = (
+                "pangeo-forge-recipes will automatically set the xarray backend for "
+                f"files of type '{file_type.value}' to '{OPENER_MAP[file_type]}', "
+            )
+            warn_matching_msg = engine_message_base + (
+                "which is the same value you have passed via `xarray_open_kwargs`. "
+                f"If this input file is actually of type '{file_type.value}', you can "
+                f"remove `{{'engine': '{kw['engine']}'}}` from `xarray_open_kwargs`. "
+            )
+            error_mismatched_msg = engine_message_base + (
+                f"which is different from the value you have passed via "
+                "`xarray_open_kwargs`. If this input file is actually of type "
+                f"'{file_type.value}', please remove `{{'engine': '{kw['engine']}'}}` "
+                "from `xarray_open_kwargs`. "
+            )
+            engine_message_tail = (
+                f"If this input file is not of type '{file_type.value}', please update"
+                " this recipe by passing a different value to `FilePattern.file_type`."
+            )
+            warn_matching_msg += engine_message_tail
+            error_mismatched_msg += engine_message_tail
+
+            if kw["engine"] == OPENER_MAP[file_type]["engine"]:
+                warnings.warn(warn_matching_msg)
+            elif kw["engine"] != OPENER_MAP[file_type]["engine"]:
+                raise ValueError(error_mismatched_msg)
+        else:
+            kw.update(OPENER_MAP[file_type])
     return kw
 
 
@@ -121,7 +121,10 @@ def open_with_xarray(
     elif isinstance(url_or_file_obj, io.IOBase):
         # required to make mypy happy
         # LocalFileOpener is a subclass of io.IOBase
-        pass
+
+        # If available, use the path to ensure multiple backends work
+        if hasattr(url_or_file_obj, "path"):
+            url_or_file_obj = url_or_file_obj.path
     elif hasattr(url_or_file_obj, "open"):
         # work around fsspec inconsistencies
         url_or_file_obj = url_or_file_obj.open()

The recipes specify the following:

pattern = pattern_from_file_sequence(
            ...
            # Must be set to unknown, as default type of 'netcdf4' will prevent xarray engine from being specified
            file_type=FileType.unknown,
        )

...

OpenWithXarray(file_type=pattern.file_type, xarray_open_kwargs={"engine": "netcdf4"})

It might be easiest if the netcdf4 engine was supported in the OPENERS_MAP, but I guess there was a reason for excluding it?

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    Status

    Done

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions