Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ jobs:
shell: bash -l {0}
run: |
export LD_PRELOAD=${{ env.LD_PRELOAD }};
pytest -n auto --cov=satpy satpy/tests --cov-report=xml --cov-report=
pytest --forked -n auto --cov=satpy satpy/tests --cov-report=xml --cov-report=

- name: Upload unittest coverage to Codecov
uses: codecov/codecov-action@v5
Expand Down
15 changes: 8 additions & 7 deletions continuous_integration/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,12 @@ dependencies:
- ephem
- bokeh
- pytest-xdist
- pytest-forked
- pip:
- pytest-lazy-fixtures
- trollsift
- trollimage>=1.24
- pyspectral
- pyorbital
- pyPublicDecompWT
- pygac
- pytest-lazy-fixtures
- trollsift
- trollimage>=1.24
- pyspectral
- pyorbital
- pyPublicDecompWT
- pygac
5 changes: 5 additions & 0 deletions satpy/readers/core/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ def load_readers(filenames=None, reader=None, reader_kwargs=None):

loadables = reader_instance.select_files_from_pathnames(readers_files)
if loadables:
# WARN: This is very confusing, but it seems to work: The reader_kwargs, when passed without a reader key,
# are pud into a dictionary where keys are individual letters of the reader name, and the value is the
# reader kwargs; however, notice the `reader[idx]` part in the call here, as idx is 0 and thus the first
# letter of the reader is used to index `reader_kwargs_without_filter`, thus retrieving the correct
# reader_kwargs.
Comment on lines +65 to +69
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So basically this logic is incorrect and works by luck/chance and needs to be fixed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if it was the intended behaviour or not... It's working in the end, so maybe it is? But yeah, it would definitely benefit from a dedicated test and then a refactor...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So what did you do to run into the issue? You called it from the Scene or noticed it within a test you already had?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, I was debugging why my "engine" keyword arg was not being taken into account. I was another issue, but I spent a while here trying to understand what was happening…

reader_instance.create_storage_items(
loadables,
fh_kwargs=reader_kwargs_without_filter[None if reader is None else reader[idx]])
Expand Down
14 changes: 5 additions & 9 deletions satpy/readers/core/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from contextlib import suppress

import dask.array as da
import netCDF4
import numpy as np
import xarray as xr

Expand Down Expand Up @@ -121,6 +120,7 @@ def _set_xarray_kwargs(self, xarray_kwargs, auto_maskandscale):
self._xarray_kwargs = xarray_kwargs or {}
self._xarray_kwargs.setdefault("chunks", CHUNK_SIZE)
self._xarray_kwargs.setdefault("mask_and_scale", auto_maskandscale)
self._xarray_kwargs["engine"] = self.accessor.engine

def collect_metadata(self, name, obj):
"""Collect all file variables and attributes for the provided file object.
Expand Down Expand Up @@ -332,14 +332,7 @@ def get_and_cache_npxr(self, var_name):
val = v
else:
try:
val = v[:]
val = xr.DataArray(val, dims=v.dimensions,
attrs=self.accessor.get_object_attrs(v),
name=v.name)
except IndexError:
# Handle scalars
val = v.__array__().item()
val = xr.DataArray(val, dims=(), attrs={}, name=var_name)
val = get_data_as_xarray(v)
except AttributeError:
# Handle strings
val = v
Expand Down Expand Up @@ -412,16 +405,19 @@ class NetCDF4Accessor:

def create_file_handle(self, filename):
"""Create a file handle."""
import netCDF4
return netCDF4.Dataset(filename, "r")

@staticmethod
def is_variable(obj):
"""Check if obj is a variable."""
import netCDF4
return isinstance(obj, netCDF4.Variable)

@staticmethod
def is_group(obj):
"""Check if obj is a group."""
import netCDF4
return isinstance(obj, netCDF4.Group)

@staticmethod
Expand Down
38 changes: 30 additions & 8 deletions satpy/tests/reader_tests/test_netcdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"""Module for testing the satpy.readers.core.netcdf module."""

import os
from contextlib import closing

import numpy as np
import pytest
Expand Down Expand Up @@ -68,22 +69,30 @@ def get_test_content(self, filename, filename_info, filetype_info):
@pytest.fixture
def netcdf_file(tmp_path):
"""Create a test NetCDF4 file."""
from netCDF4 import Dataset
filename = tmp_path / "test.nc"
with Dataset(filename, "w") as nc:
from multiprocessing import Process
p = Process(target=_create_netcdf_file, args=(filename, ))
p.start()
p.join()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this complication, does it help us in some way?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, see the text I wrote in the PR note. Basically, it’s because in some environments (like mine), writing with netcdf4 and reading with h5netdf from the same process crashes, making all the h5netcdf tests fail in my case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ok. I didn't see the connection between the comment and the fixture.

return filename


def _create_netcdf_file(filename):
from netCDF4 import Dataset
with closing(Dataset(filename, "w")) as nc:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is closing needed? Isn't it done automatically by Dataset?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked at the documentation, and there no mention at all of the context manager behaviour, hence the change.
But I have now looked at the code, and you're right, it is done automatically:
https://github.com/Unidata/netcdf4-python/blob/master/src/netCDF4/_netCDF4.pyx#L2547-L2551

Fixing

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed in 99ff436

# Create dimensions
nc.createDimension("rows", 10)
nc.createDimension("cols", 100)

# Create Group
g1 = nc.createGroup("test_group")

# Add datasets
ds1_f = g1.createVariable("ds1_f", np.float32,
dimensions=("rows", "cols"))
ds1_f[:] = np.arange(10. * 100).reshape((10, 100))
ds1_i = g1.createVariable("ds1_i", np.int32,
dimensions=("rows", "cols"))
ds1_f.set_auto_scale(True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line uses ds1_f but it is below ds1_i (ds1_f is used above that). What is the intended outcome? And doesn't this only matter if scale_factor and add_offset are defined? I don't see them for any variables here.

Copy link
Member Author

@mraspaud mraspaud Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, this is an oversight on my part, I should have put them in the correct order. The set_auto_scale I added because it seemed to be needed to make the existing tests pass after converting to a file-based fixture (edit: it already was a file-based fixture).

ds1_i[:] = np.arange(10 * 100).reshape((10, 100))
ds2_f = nc.createVariable("ds2_f", np.float32,
dimensions=("rows", "cols"))
Expand All @@ -95,7 +104,7 @@ def netcdf_file(tmp_path):
dimensions=("rows",))
ds2_s[:] = np.arange(10)
ds2_sc = nc.createVariable("ds2_sc", np.int8, dimensions=())
ds2_sc[:] = 42
ds2_sc[:] = np.int8(42)

# Add attributes
nc.test_attr_str = "test_string"
Expand Down Expand Up @@ -230,21 +239,34 @@ def test_filenotfound(self):
with pytest.raises(IOError, match=".*(No such file or directory|Unknown file format).*"):
NetCDF4FileHandler("/thisfiledoesnotexist.nc", {}, {})

def test_get_and_cache_npxr_is_xr(self, netcdf_file):
@pytest.mark.parametrize("engine", ["netcdf4", "h5netcdf"])
def test_get_and_cache_npxr_is_xr(self, netcdf_file, engine):
"""Test that get_and_cache_npxr() returns xr.DataArray."""
import xarray as xr

from satpy.readers.core.netcdf import NetCDF4FileHandler
file_handler = NetCDF4FileHandler(netcdf_file, {}, {}, cache_handle=True)
file_handler = NetCDF4FileHandler(netcdf_file, {}, {}, cache_handle=True, engine=engine)

data = file_handler.get_and_cache_npxr("test_group/ds1_f")
assert isinstance(data, xr.DataArray)

def test_get_and_cache_npxr_data_is_cached(self, netcdf_file):
@pytest.mark.parametrize("engine", ["netcdf4", "h5netcdf"])
def test_get_and_cache_npxr_for_scalar(self, netcdf_file, engine):
"""Test that get_and_cache_npxr() returns xr.DataArray."""
from satpy.readers.core.netcdf import NetCDF4FileHandler
file_handler = NetCDF4FileHandler(netcdf_file, {}, {}, cache_handle=True, engine=engine)

data = file_handler.get_and_cache_npxr("ds2_sc")
# WARN: h5netcdf returns an int64!
assert data.dtype in [np.int8, np.int64], "Scalar should be of type int8"
assert data == 42

@pytest.mark.parametrize("engine", ["netcdf4", "h5netcdf"])
def test_get_and_cache_npxr_data_is_cached(self, netcdf_file, engine):
"""Test that the data are cached when get_and_cache_npxr() is called."""
from satpy.readers.core.netcdf import NetCDF4FileHandler

file_handler = NetCDF4FileHandler(netcdf_file, {}, {}, cache_handle=True)
file_handler = NetCDF4FileHandler(netcdf_file, {}, {}, cache_handle=True, engine=engine)
data = file_handler.get_and_cache_npxr("test_group/ds1_f")

# Delete the dataset from the file content dict, it should be available from the cache
Expand Down
Loading