Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

### Adjustments and Enhancements

- Added a new core rule `access-latency` that can be used to check the
time it takes to open datasets.

- Added HTML styling for both CLI output (`--format html`) and rendering
of `Result` objects in Jupyter notebooks.

Expand Down
6 changes: 6 additions & 0 deletions docs/rule-ref.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ New rules will be added by upcoming XRLint releases.

## Core Rules

### :material-bug: `access-latency`

Ensure that the time it takes to open a dataset from its source does a exceed a given `threshold` in seconds. The default threshold is `2.5`.

Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert:

### :material-lightbulb: `content-desc`

A dataset should provide information about where the data came from and what has been done to it. This information is mainly for the benefit of human readers. The rule accepts the following configuration parameters:
Expand Down
6 changes: 2 additions & 4 deletions docs/todo.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
- support zarr >= 3 which we do not only because test
`tests/plugins/xcube/processors/test_mldataset.py` fails
(see code TODO)
- validate `RuleConfig.args/kwargs` against `RuleMeta.schema`
(see code TODO)
- enhance docs
- complete configuration page
- provide guide page
Expand All @@ -21,11 +19,11 @@
- add `core` rule checks recommended use of fill value
- add `xcube` rule that helps to identify chunking issues
- apply rule op args/kwargs validation schema
- measure time it takes to open a dataset and pass time into rule context
so we can write a configurable rule that checks the opening time
- allow outputting suggestions, if any, that are emitted by some rules
- add CLI option
- expand/collapse messages with suggestions in Jupyter notebooks
- validate `RuleConfig.args/kwargs` against `RuleMeta.schema`
(see code TODO)

## Nice to have

Expand Down
7 changes: 5 additions & 2 deletions tests/_linter/test_rulectx.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@ class RuleContextImplTest(TestCase):
def test_defaults(self):
config_obj = ConfigObject()
dataset = xr.Dataset()
context = RuleContextImpl(config_obj, dataset, "./ds.zarr", None)
context = RuleContextImpl(config_obj, dataset, "./ds.zarr", None, None)
self.assertIs(config_obj, context.config)
self.assertIs(dataset, context.dataset)
self.assertEqual({}, context.settings)
self.assertEqual("./ds.zarr", context.file_path)
self.assertEqual(None, context.file_index)
self.assertEqual(None, context.access_latency)

def test_report(self):
context = RuleContextImpl(ConfigObject(), xr.Dataset(), "./ds.zarr", None)
context = RuleContextImpl(
ConfigObject(), xr.Dataset(), "./ds.zarr", None, 1.2345
)
with context.use_state(rule_id="no-xxx"):
context.report(
"What the heck do you mean?",
Expand Down
75 changes: 75 additions & 0 deletions tests/plugins/core/rules/test_access_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from unittest import TestCase

import pytest
import xarray as xr

# noinspection PyProtectedMember
from xrlint._linter.rulectx import RuleContextImpl
from xrlint.config import ConfigObject
from xrlint.node import DatasetNode
from xrlint.plugins.core.rules.access_latency import AccessLatency
from xrlint.result import Message
from xrlint.rule import RuleExit

valid_dataset_0 = xr.Dataset()

invalid_dataset_0 = xr.Dataset()


class OpeningTimeTest(TestCase):
@classmethod
def invoke_op(
cls, dataset: xr.Dataset, access_latency: float, threshold: float | None = None
):
ctx = RuleContextImpl(
config=ConfigObject(),
dataset=dataset,
file_path="test.zarr",
file_index=None,
access_latency=access_latency,
)
node = DatasetNode(
path="dataset",
parent=None,
dataset=ctx.dataset,
)
rule_op = (
AccessLatency(threshold=threshold)
if threshold is not None
else AccessLatency()
)
with pytest.raises(RuleExit):
rule_op.validate_dataset(ctx, node)
return ctx

def test_valid(self):
ctx = self.invoke_op(xr.Dataset(), 1.0, threshold=None)
self.assertEqual([], ctx.messages)

ctx = self.invoke_op(xr.Dataset(), 1.0, threshold=1.0)
self.assertEqual([], ctx.messages)

def test_invalid(self):
ctx = self.invoke_op(xr.Dataset(), 3.16, threshold=None)
self.assertEqual(
[
Message(
message="Access latency exceeds threshold: 3.2 > 2.5 seconds.",
node_path="dataset",
severity=2,
)
],
ctx.messages,
)

ctx = self.invoke_op(xr.Dataset(), 0.2032, threshold=0.1)
self.assertEqual(
[
Message(
message="Access latency exceeds threshold: 0.2 > 0.1 seconds.",
node_path="dataset",
severity=2,
)
],
ctx.messages,
)
3 changes: 2 additions & 1 deletion tests/plugins/core/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@ def test_rules_complete(self):
plugin = export_plugin()
self.assertEqual(
{
"access-latency",
"content-desc",
"conventions",
"coords-for-dims",
"grid-mappings",
"lat-coordinate",
"lon-coordinate",
"no-empty-attrs",
"time-coordinate",
"no-empty-chunks",
"time-coordinate",
"var-desc",
"var-flags",
"var-units",
Expand Down
6 changes: 6 additions & 0 deletions xrlint/_linter/rulectx.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(
dataset: xr.Dataset,
file_path: str,
file_index: int | None,
access_latency: float | None,
):
assert config is not None
assert dataset is not None
Expand All @@ -26,6 +27,7 @@ def __init__(
self._dataset = dataset
self._file_path = file_path
self._file_index = file_index
self._access_latency = access_latency
self.messages: list[Message] = []
self.rule_id: str | None = None
self.severity: Literal[1, 2] = SEVERITY_ERROR
Expand All @@ -51,6 +53,10 @@ def file_path(self) -> str:
def file_index(self) -> int | None:
return self._file_index

@property
def access_latency(self) -> float | None:
return self._access_latency

def report(
self,
message: str,
Expand Down
18 changes: 14 additions & 4 deletions xrlint/_linter/validate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import time
from typing import Any

import xarray as xr
Expand All @@ -15,7 +16,7 @@ def validate_dataset(config_obj: ConfigObject, dataset: Any, file_path: str):
assert dataset is not None
assert isinstance(file_path, str)
if isinstance(dataset, xr.Dataset):
messages = _validate_dataset(config_obj, dataset, file_path, None)
messages = _validate_dataset(config_obj, dataset, file_path, None, None)
else:
messages = _open_and_validate_dataset(config_obj, dataset, file_path)
return Result.new(config_object=config_obj, messages=messages, file_path=file_path)
Expand All @@ -26,12 +27,15 @@ def _validate_dataset(
dataset: xr.Dataset,
file_path: str,
file_index: int | None,
access_latency: float | None,
) -> list[Message]:
assert isinstance(config_obj, ConfigObject)
assert isinstance(dataset, xr.Dataset)
assert isinstance(file_path, str)

context = RuleContextImpl(config_obj, dataset, file_path, file_index)
context = RuleContextImpl(
config_obj, dataset, file_path, file_index, access_latency
)
for rule_id, rule_config in config_obj.rules.items():
with context.use_state(rule_id=rule_id):
apply_rule(context, rule_id, rule_config)
Expand All @@ -48,24 +52,30 @@ def _open_and_validate_dataset(
opener_options = config_obj.opener_options or {}
if config_obj.processor is not None:
processor_op = config_obj.get_processor_op(config_obj.processor)
t0 = time.time()
try:
ds_path_list = processor_op.preprocess(file_path, opener_options)
except (OSError, ValueError, TypeError) as e:
return [new_fatal_message(str(e))]
access_latency = time.time() - t0
return processor_op.postprocess(
[
_validate_dataset(config_obj, ds, path, i)
_validate_dataset(config_obj, ds, path, i, access_latency)
for i, (ds, path) in enumerate(ds_path_list)
],
file_path,
)
else:
t0 = time.time()
try:
dataset = _open_dataset(ds_source, opener_options, file_path)
except (OSError, ValueError, TypeError) as e:
return [new_fatal_message(str(e))]
access_latency = time.time() - t0
with dataset:
return _validate_dataset(config_obj, dataset, file_path, None)
return _validate_dataset(
config_obj, dataset, file_path, None, access_latency
)


def _open_dataset(
Expand Down
1 change: 1 addition & 0 deletions xrlint/plugins/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def export_plugin() -> Plugin:
{
"name": "recommended",
"rules": {
"access-latency": "warn",
"content-desc": "warn",
"conventions": "warn",
"coords-for-dims": "error",
Expand Down
2 changes: 1 addition & 1 deletion xrlint/plugins/core/plugin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from xrlint.constants import CORE_PLUGIN_NAME, CORE_DOCS_URL
from xrlint.constants import CORE_DOCS_URL, CORE_PLUGIN_NAME
from xrlint.plugin import new_plugin
from xrlint.version import version

Expand Down
46 changes: 46 additions & 0 deletions xrlint/plugins/core/rules/access_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright © 2025 Brockmann Consult GmbH.
# This software is distributed under the terms and conditions of the
# MIT license (https://mit-license.org/).

from typing import Final

from xrlint.node import DatasetNode
from xrlint.plugins.core.plugin import plugin
from xrlint.rule import RuleContext, RuleExit, RuleOp
from xrlint.util.formatting import format_count
from xrlint.util.schema import schema

DEFAULT_THRESHOLD: Final = 2.5 # seconds


@plugin.define_rule(
"access-latency",
version="1.0.0",
description=(
"Ensure that the time it takes to open a dataset from its source"
" does a exceed a given `threshold` in seconds."
f" The default threshold is `{DEFAULT_THRESHOLD}`."
),
schema=schema(
"object",
properties={
"threshold": schema(
"number",
exclusiveMinimum=0,
default=DEFAULT_THRESHOLD,
title="Threshold time in seconds",
)
},
),
)
class AccessLatency(RuleOp):
def __init__(self, threshold: float = DEFAULT_THRESHOLD):
self.threshold = threshold

def validate_dataset(self, ctx: RuleContext, node: DatasetNode) -> None:
if ctx.access_latency is not None and ctx.access_latency > self.threshold:
ctx.report(
f"Access latency exceeds threshold: {ctx.access_latency:.1f}"
f" > {format_count(self.threshold, 'second')}."
)
raise RuleExit
2 changes: 1 addition & 1 deletion xrlint/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from typing import TYPE_CHECKING, Literal, Union

from xrlint.constants import (
CORE_PLUGIN_NAME,
CORE_DOCS_URL,
CORE_PLUGIN_NAME,
MISSING_DATASET_FILE_PATH,
SEVERITY_ERROR,
SEVERITY_WARN,
Expand Down
23 changes: 15 additions & 8 deletions xrlint/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ def settings(self) -> dict[str, Any]:
def dataset(self) -> xr.Dataset:
"""The current dataset."""

@property
@abstractmethod
def access_latency(self) -> float | None:
"""The time in seconds that it took for opening the dataset.
`None` if the dataset has not been opened from `file_path`.
"""

@abstractmethod
def report(
self,
Expand Down Expand Up @@ -75,44 +82,44 @@ class RuleExit(Exception):
class RuleOp(ABC):
"""Define the specific rule validation operations."""

def validate_dataset(self, context: RuleContext, node: DatasetNode) -> None:
def validate_dataset(self, ctx: RuleContext, node: DatasetNode) -> None:
"""Validate the given dataset node.

Args:
context: The current rule context.
ctx: The current rule context.
node: The dataset node.

Raises:
RuleExit: to exit rule logic and further node traversal
"""

def validate_variable(self, context: RuleContext, node: VariableNode) -> None:
def validate_variable(self, ctx: RuleContext, node: VariableNode) -> None:
"""Validate the given data array (variable) node.

Args:
context: The current rule context.
ctx: The current rule context.
node: The data array (variable) node.

Raises:
RuleExit: to exit rule logic and further node traversal
"""

def validate_attrs(self, context: RuleContext, node: AttrsNode) -> None:
def validate_attrs(self, ctx: RuleContext, node: AttrsNode) -> None:
"""Validate the given attributes node.

Args:
context: The current rule context.
ctx: The current rule context.
node: The attributes node.

Raises:
RuleExit: to exit rule logic and further node traversal
"""

def validate_attr(self, context: RuleContext, node: AttrNode) -> None:
def validate_attr(self, ctx: RuleContext, node: AttrNode) -> None:
"""Validate the given attribute node.

Args:
context: The current rule context.
ctx: The current rule context.
node: The attribute node.

Raises:
Expand Down
Loading