Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ This document explains the changes made to Iris for this release
using :meth:`~iris.cube.Cube.aggregated_by` or :meth:`~iris.cube.Cube.collapsed`.
(:issue:`6473`, :pull:`6706`, :pull:`6719`)

#. `@trexfeathers`_ protected the NetCDF saving code from a transient I/O
error, caused by bad synchronisation between Python-layer and HDF-layer
file locking on certain filesystems. (:pull:`6760`).


💣 Incompatible Changes
=======================
Expand Down
20 changes: 19 additions & 1 deletion lib/iris/fileformats/netcdf/_thread_safe_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from abc import ABC
from threading import Lock
from time import sleep
import typing

import netCDF4
Expand Down Expand Up @@ -386,7 +387,24 @@ def __setitem__(self, keys, array_data):
with _GLOBAL_NETCDF4_LOCK:
dataset = None
try:
dataset = netCDF4.Dataset(self.path, "r+")
# Even when fully serialised - no parallelism - HDF still
# occasionally fails to acquire the file. This is despite all
# Python locks being available at expected moments, and the
# file reporting as closed. During testing, 2nd retry always
# succeeded. This is likely caused by HDF-level locking
# running on a different timescale to Python-level locking -
# i.e. sometimes Python has released its locks but HDF still
# has not. Thought to be filesystem-dependent; further
# investigation needed.
for attempt in range(5):
try:
dataset = netCDF4.Dataset(self.path, "r+")
break
except OSError:
if attempt < 4:
sleep(0.1)
else:
raise
var = dataset.variables[self.varname]
var[keys] = array_data
finally:
Expand Down
5 changes: 4 additions & 1 deletion lib/iris/tests/test_coding_standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import pytest

import iris
from iris.fileformats.netcdf import _thread_safe_nc
from iris.tests import system_test

LICENSE_TEMPLATE = """# Copyright Iris contributors
Expand Down Expand Up @@ -44,6 +43,9 @@ def test_netcdf4_import():
# Please avoid including these phrases in any comments/strings throughout
# Iris (e.g. use "from the netCDF4 library" instead) - this allows the
# below search to remain quick and simple.
from iris.fileformats.netcdf import _thread_safe_nc
from iris.tests.unit.fileformats.netcdf._thread_safe_nc import test_NetCDFWriteProxy

import_strings = ("import netCDF4", "from netCDF4")

files_including_import = []
Expand All @@ -55,6 +57,7 @@ def test_netcdf4_import():

expected = [
Path(_thread_safe_nc.__file__),
Path(test_NetCDFWriteProxy.__file__),
Path(system_test.__file__),
Path(__file__),
]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for the :mod:`iris.fileformats.netcdf._thread_safe_nc` module.
Not required for a private module, but useful for specific checks.
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for :class:`iris.fileformats.netcdf._thread_safe_nc.NetCDFWriteProxy`."""

from threading import Lock

import netCDF4 as nc
from netCDF4 import Dataset as DatasetOriginal
import pytest

from iris.fileformats.netcdf._thread_safe_nc import DatasetWrapper, NetCDFWriteProxy


@pytest.fixture
def dataset_path(tmp_path):
return tmp_path / "test.nc"


@pytest.fixture
def netcdf_variable(dataset_path):
dataset = DatasetWrapper(dataset_path, "w")
_ = dataset.createDimension("dim1", 1)
variable = dataset.createVariable(
"test_var",
"f4",
("dim1",),
)
return variable


@pytest.fixture
def write_proxy(netcdf_variable) -> NetCDFWriteProxy:
dataset = netcdf_variable.group()
proxy = NetCDFWriteProxy(
filepath=dataset.filepath(),
cf_var=netcdf_variable,
file_write_lock=Lock(),
)
dataset.close()
return proxy


class UnreliableDatasetMaker:
"""A mock operation that returns a Dataset, but fails the first time it is called.
This simulates non-deterministic HDF locking errors which are difficult to
debug at the Python layer - pending further investigation.
"""

def __init__(self):
self.call_count = 0

def __call__(self, *args, **kwargs) -> nc.Dataset:
self.call_count += 1
if self.call_count < 2:
raise OSError("Simulated non-deterministic HDF locking error")
else:
return DatasetOriginal(*args, **kwargs)


def test_handle_hdf_locking_error(dataset_path, monkeypatch, write_proxy):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should probably also have test coverage showing that Iris will bail after 5 attempts

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"""Test that NetCDFWriteProxy can handle non-deterministic HDF locking errors."""
monkeypatch.setattr(nc, "Dataset", UnreliableDatasetMaker())
with pytest.raises(OSError, match="Simulated non-deterministic HDF locking error"):
dataset = nc.Dataset(write_proxy.path, "r+")
var = dataset.variables[write_proxy.varname]
var[0] = 1.0

# Reset.
monkeypatch.setattr(nc, "Dataset", UnreliableDatasetMaker())
try:
write_proxy[0] = 1.0
except OSError:
pytest.fail("NetCDFWriteProxy failed to handle HDF locking error")
Loading