Skip to content

Commit ce1c608

Browse files
authored
Detect inconsistent writes to datasets properties (#633)
* emit warning when trying to export dataset properties while overwriting properties which are newer than the ones most recently seen * update changelog
1 parent e6ba99e commit ce1c608

File tree

4 files changed

+62
-22
lines changed

4 files changed

+62
-22
lines changed

webknossos/Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
1717
### Added
1818

1919
### Changed
20+
- Inconsistent writes to datasets properties (e.g., caused due to multiprocessing) are detected automatically. The warning can be escalated to an exception with `warnings.filterwarnings("error", module="webknossos", message=r"\[WARNING\]")`. [#633](https://github.com/scalableminds/webknossos-libs/pull/633)
2021

2122
### Fixed
2223

webknossos/tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ def error_on_deprecations() -> Generator:
7575
yield
7676

7777

78+
@pytest.fixture(autouse=True, scope="function")
79+
def error_on_warnings() -> Generator:
80+
with warnings.catch_warnings():
81+
warnings.filterwarnings("error", module="webknossos", message=r"\[WARNING\]")
82+
yield
83+
84+
7885
### VCR.py / pytest-recording CONFIG
7986

8087

webknossos/tests/test_dataset.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1977,3 +1977,24 @@ def test_pickle_view(tmp_path: Path) -> None:
19771977

19781978
# Make sure that the attributes of the MagView (not View) still exist
19791979
assert pickled_mag1.layer is not None
1980+
1981+
1982+
def test_warn_outdated_properties(tmp_path: Path) -> None:
1983+
1984+
ds1 = Dataset(tmp_path / "ds", scale=(1, 1, 1))
1985+
ds2 = Dataset.open(tmp_path / "ds")
1986+
1987+
# Change ds1 and undo it again
1988+
ds1.add_layer("color", COLOR_CATEGORY).add_mag(1)
1989+
ds1.delete_layer("color")
1990+
1991+
# Changing ds2 should work fine, since the properties on disk
1992+
# haven't changed.
1993+
ds2.add_layer("segmentation", SEGMENTATION_CATEGORY, largest_segment_id=1).add_mag(
1994+
1
1995+
)
1996+
1997+
with pytest.raises(UserWarning):
1998+
# Changing ds1 should raise a warning, since ds1
1999+
# does not know about the change in ds2
2000+
ds1.add_layer("color", COLOR_CATEGORY)

webknossos/webknossos/dataset/dataset.py

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -148,30 +148,24 @@ def __init__(
148148
)
149149

150150
self.path = dataset_path
151-
with open(
152-
self.path / PROPERTIES_FILE_NAME, encoding="utf-8"
153-
) as datasource_properties:
154-
data = json.load(datasource_properties)
155-
156-
self._properties: DatasetProperties = dataset_converter.structure(
157-
data, DatasetProperties
151+
self._properties: DatasetProperties = self._load_properties()
152+
self._last_read_properties = copy.deepcopy(self._properties)
153+
154+
self._layers: Dict[str, Layer] = {}
155+
# construct self.layer
156+
for layer_properties in self._properties.data_layers:
157+
num_channels = _extract_num_channels(
158+
layer_properties.num_channels,
159+
Path(dataset_path),
160+
layer_properties.name,
161+
layer_properties.wkw_resolutions[0].resolution
162+
if len(layer_properties.wkw_resolutions) > 0
163+
else None,
158164
)
165+
layer_properties.num_channels = num_channels
159166

160-
self._layers: Dict[str, Layer] = {}
161-
# construct self.layer
162-
for layer_properties in self._properties.data_layers:
163-
num_channels = _extract_num_channels(
164-
layer_properties.num_channels,
165-
Path(dataset_path),
166-
layer_properties.name,
167-
layer_properties.wkw_resolutions[0].resolution
168-
if len(layer_properties.wkw_resolutions) > 0
169-
else None,
170-
)
171-
layer_properties.num_channels = num_channels
172-
173-
layer = self._initialize_layer_from_properties(layer_properties)
174-
self._layers[layer_properties.name] = layer
167+
layer = self._initialize_layer_from_properties(layer_properties)
168+
self._layers[layer_properties.name] = layer
175169

176170
if dataset_existed_already:
177171
if scale is None:
@@ -825,14 +819,31 @@ def get_or_create(
825819
def __repr__(self) -> str:
826820
return repr("Dataset(%s)" % self.path)
827821

822+
def _load_properties(self) -> DatasetProperties:
823+
with open(
824+
self.path / PROPERTIES_FILE_NAME, encoding="utf-8"
825+
) as datasource_properties:
826+
data = json.load(datasource_properties)
827+
return dataset_converter.structure(data, DatasetProperties)
828+
828829
def _export_as_json(self) -> None:
830+
831+
properties_on_disk = self._load_properties()
832+
833+
if properties_on_disk != self._last_read_properties:
834+
warnings.warn(
835+
"[WARNING] While exporting the dataset's properties, properties were found on disk which are newer than the ones that were seen last time. The properties will be overwritten. This is likely happening because multiple processes changed the metadata of this dataset."
836+
)
837+
829838
with open(self.path / PROPERTIES_FILE_NAME, "w", encoding="utf-8") as outfile:
830839
json.dump(
831840
dataset_converter.unstructure(self._properties),
832841
outfile,
833842
indent=4,
834843
)
835844

845+
self._last_read_properties = copy.deepcopy(self._properties)
846+
836847
def _initialize_layer_from_properties(self, properties: LayerProperties) -> Layer:
837848
if properties.category == COLOR_CATEGORY:
838849
return Layer(self, properties)

0 commit comments

Comments
 (0)