Skip to content

Commit 3c60912

Browse files
authored
Merge pull request #50 from bcdev/forman-opening_time
Check access latency
2 parents 200ce2f + a938d47 commit 3c60912

File tree

15 files changed

+186
-23
lines changed

15 files changed

+186
-23
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
### Adjustments and Enhancements
66

7+
- Added a new core rule `access-latency` that can be used to check the
8+
time it takes to open datasets.
9+
710
- Added HTML styling for both CLI output (`--format html`) and rendering
811
of `Result` objects in Jupyter notebooks.
912

docs/rule-ref.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ New rules will be added by upcoming XRLint releases.
55

66
## Core Rules
77

8+
### :material-bug: `access-latency`
9+
10+
Ensure that the time it takes to open a dataset from its source does a exceed a given `threshold` in seconds. The default threshold is `2.5`.
11+
12+
Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert:
13+
814
### :material-lightbulb: `content-desc`
915

1016
A dataset should provide information about where the data came from and what has been done to it. This information is mainly for the benefit of human readers. The rule accepts the following configuration parameters:

docs/todo.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
- support zarr >= 3 which we do not only because test
66
`tests/plugins/xcube/processors/test_mldataset.py` fails
77
(see code TODO)
8-
- validate `RuleConfig.args/kwargs` against `RuleMeta.schema`
9-
(see code TODO)
108
- enhance docs
119
- complete configuration page
1210
- provide guide page
@@ -21,11 +19,11 @@
2119
- add `core` rule checks recommended use of fill value
2220
- add `xcube` rule that helps to identify chunking issues
2321
- apply rule op args/kwargs validation schema
24-
- measure time it takes to open a dataset and pass time into rule context
25-
so we can write a configurable rule that checks the opening time
2622
- allow outputting suggestions, if any, that are emitted by some rules
2723
- add CLI option
2824
- expand/collapse messages with suggestions in Jupyter notebooks
25+
- validate `RuleConfig.args/kwargs` against `RuleMeta.schema`
26+
(see code TODO)
2927

3028
## Nice to have
3129

tests/_linter/test_rulectx.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,18 @@ class RuleContextImplTest(TestCase):
1313
def test_defaults(self):
1414
config_obj = ConfigObject()
1515
dataset = xr.Dataset()
16-
context = RuleContextImpl(config_obj, dataset, "./ds.zarr", None)
16+
context = RuleContextImpl(config_obj, dataset, "./ds.zarr", None, None)
1717
self.assertIs(config_obj, context.config)
1818
self.assertIs(dataset, context.dataset)
1919
self.assertEqual({}, context.settings)
2020
self.assertEqual("./ds.zarr", context.file_path)
2121
self.assertEqual(None, context.file_index)
22+
self.assertEqual(None, context.access_latency)
2223

2324
def test_report(self):
24-
context = RuleContextImpl(ConfigObject(), xr.Dataset(), "./ds.zarr", None)
25+
context = RuleContextImpl(
26+
ConfigObject(), xr.Dataset(), "./ds.zarr", None, 1.2345
27+
)
2528
with context.use_state(rule_id="no-xxx"):
2629
context.report(
2730
"What the heck do you mean?",
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from unittest import TestCase
2+
3+
import pytest
4+
import xarray as xr
5+
6+
# noinspection PyProtectedMember
7+
from xrlint._linter.rulectx import RuleContextImpl
8+
from xrlint.config import ConfigObject
9+
from xrlint.node import DatasetNode
10+
from xrlint.plugins.core.rules.access_latency import AccessLatency
11+
from xrlint.result import Message
12+
from xrlint.rule import RuleExit
13+
14+
valid_dataset_0 = xr.Dataset()
15+
16+
invalid_dataset_0 = xr.Dataset()
17+
18+
19+
class OpeningTimeTest(TestCase):
20+
@classmethod
21+
def invoke_op(
22+
cls, dataset: xr.Dataset, access_latency: float, threshold: float | None = None
23+
):
24+
ctx = RuleContextImpl(
25+
config=ConfigObject(),
26+
dataset=dataset,
27+
file_path="test.zarr",
28+
file_index=None,
29+
access_latency=access_latency,
30+
)
31+
node = DatasetNode(
32+
path="dataset",
33+
parent=None,
34+
dataset=ctx.dataset,
35+
)
36+
rule_op = (
37+
AccessLatency(threshold=threshold)
38+
if threshold is not None
39+
else AccessLatency()
40+
)
41+
with pytest.raises(RuleExit):
42+
rule_op.validate_dataset(ctx, node)
43+
return ctx
44+
45+
def test_valid(self):
46+
ctx = self.invoke_op(xr.Dataset(), 1.0, threshold=None)
47+
self.assertEqual([], ctx.messages)
48+
49+
ctx = self.invoke_op(xr.Dataset(), 1.0, threshold=1.0)
50+
self.assertEqual([], ctx.messages)
51+
52+
def test_invalid(self):
53+
ctx = self.invoke_op(xr.Dataset(), 3.16, threshold=None)
54+
self.assertEqual(
55+
[
56+
Message(
57+
message="Access latency exceeds threshold: 3.2 > 2.5 seconds.",
58+
node_path="dataset",
59+
severity=2,
60+
)
61+
],
62+
ctx.messages,
63+
)
64+
65+
ctx = self.invoke_op(xr.Dataset(), 0.2032, threshold=0.1)
66+
self.assertEqual(
67+
[
68+
Message(
69+
message="Access latency exceeds threshold: 0.2 > 0.1 seconds.",
70+
node_path="dataset",
71+
severity=2,
72+
)
73+
],
74+
ctx.messages,
75+
)

tests/plugins/core/test_plugin.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@ def test_rules_complete(self):
88
plugin = export_plugin()
99
self.assertEqual(
1010
{
11+
"access-latency",
1112
"content-desc",
1213
"conventions",
1314
"coords-for-dims",
1415
"grid-mappings",
1516
"lat-coordinate",
1617
"lon-coordinate",
1718
"no-empty-attrs",
18-
"time-coordinate",
1919
"no-empty-chunks",
20+
"time-coordinate",
2021
"var-desc",
2122
"var-flags",
2223
"var-units",

xrlint/_linter/rulectx.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def __init__(
1717
dataset: xr.Dataset,
1818
file_path: str,
1919
file_index: int | None,
20+
access_latency: float | None,
2021
):
2122
assert config is not None
2223
assert dataset is not None
@@ -26,6 +27,7 @@ def __init__(
2627
self._dataset = dataset
2728
self._file_path = file_path
2829
self._file_index = file_index
30+
self._access_latency = access_latency
2931
self.messages: list[Message] = []
3032
self.rule_id: str | None = None
3133
self.severity: Literal[1, 2] = SEVERITY_ERROR
@@ -51,6 +53,10 @@ def file_path(self) -> str:
5153
def file_index(self) -> int | None:
5254
return self._file_index
5355

56+
@property
57+
def access_latency(self) -> float | None:
58+
return self._access_latency
59+
5460
def report(
5561
self,
5662
message: str,

xrlint/_linter/validate.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import time
12
from typing import Any
23

34
import xarray as xr
@@ -15,7 +16,7 @@ def validate_dataset(config_obj: ConfigObject, dataset: Any, file_path: str):
1516
assert dataset is not None
1617
assert isinstance(file_path, str)
1718
if isinstance(dataset, xr.Dataset):
18-
messages = _validate_dataset(config_obj, dataset, file_path, None)
19+
messages = _validate_dataset(config_obj, dataset, file_path, None, None)
1920
else:
2021
messages = _open_and_validate_dataset(config_obj, dataset, file_path)
2122
return Result.new(config_object=config_obj, messages=messages, file_path=file_path)
@@ -26,12 +27,15 @@ def _validate_dataset(
2627
dataset: xr.Dataset,
2728
file_path: str,
2829
file_index: int | None,
30+
access_latency: float | None,
2931
) -> list[Message]:
3032
assert isinstance(config_obj, ConfigObject)
3133
assert isinstance(dataset, xr.Dataset)
3234
assert isinstance(file_path, str)
3335

34-
context = RuleContextImpl(config_obj, dataset, file_path, file_index)
36+
context = RuleContextImpl(
37+
config_obj, dataset, file_path, file_index, access_latency
38+
)
3539
for rule_id, rule_config in config_obj.rules.items():
3640
with context.use_state(rule_id=rule_id):
3741
apply_rule(context, rule_id, rule_config)
@@ -48,24 +52,30 @@ def _open_and_validate_dataset(
4852
opener_options = config_obj.opener_options or {}
4953
if config_obj.processor is not None:
5054
processor_op = config_obj.get_processor_op(config_obj.processor)
55+
t0 = time.time()
5156
try:
5257
ds_path_list = processor_op.preprocess(file_path, opener_options)
5358
except (OSError, ValueError, TypeError) as e:
5459
return [new_fatal_message(str(e))]
60+
access_latency = time.time() - t0
5561
return processor_op.postprocess(
5662
[
57-
_validate_dataset(config_obj, ds, path, i)
63+
_validate_dataset(config_obj, ds, path, i, access_latency)
5864
for i, (ds, path) in enumerate(ds_path_list)
5965
],
6066
file_path,
6167
)
6268
else:
69+
t0 = time.time()
6370
try:
6471
dataset = _open_dataset(ds_source, opener_options, file_path)
6572
except (OSError, ValueError, TypeError) as e:
6673
return [new_fatal_message(str(e))]
74+
access_latency = time.time() - t0
6775
with dataset:
68-
return _validate_dataset(config_obj, dataset, file_path, None)
76+
return _validate_dataset(
77+
config_obj, dataset, file_path, None, access_latency
78+
)
6979

7080

7181
def _open_dataset(

xrlint/plugins/core/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def export_plugin() -> Plugin:
1212
{
1313
"name": "recommended",
1414
"rules": {
15+
"access-latency": "warn",
1516
"content-desc": "warn",
1617
"conventions": "warn",
1718
"coords-for-dims": "error",

xrlint/plugins/core/plugin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from xrlint.constants import CORE_PLUGIN_NAME, CORE_DOCS_URL
1+
from xrlint.constants import CORE_DOCS_URL, CORE_PLUGIN_NAME
22
from xrlint.plugin import new_plugin
33
from xrlint.version import version
44

0 commit comments

Comments
 (0)