Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

## Version 0.4.0 (in development)

- New xcube rule `no-chunked-coords`. (#29)
- New xcube multi-level dataset rules:
- `ml-dataset-meta`: verifies that a meta info file exists and is consistent
- `ml-dataset-xy`: verifies that the levels have expected spatial resolutions
- `ml-dataset-time`: verifies that the levels have expected time dimension, if any
- `ml-dataset-meta`: verifies that a meta info file exists and is consistent;
- `ml-dataset-xy`: verifies that the levels have expected spatial resolutions;
- `ml-dataset-time`: verifies that the levels have expected time dimension, if any.
- Now supporting xcube multi-level datasets `*.levels`:
- Added xcube plugin processor `"xcube/multi-level-dataset"` that is used
inside the predefined xcube configurations "all" and "recommended".
Expand Down
6 changes: 6 additions & 0 deletions docs/rule-ref.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ Multi-level dataset levels should provide spatial resolutions decreasing by powe

Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:

### :material-bug: `no-chunked-coords`

Coordinate variables should not be chunked. Can be used to identify performance issues, where chunked coordinates can cause slow opening if datasets due to the many chunk-fetching requests made to (remote) filesystems with low bandwidth. You can use the `limit` parameter to specify an acceptable number of chunks. Its default is 5.

Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert:

### :material-bug: `single-grid-mapping`

A single grid mapping shall be used for all spatial data variables of a datacube.
Expand Down
28 changes: 28 additions & 0 deletions tests/plugins/xcube/rules/test_no_chunked_coords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import xarray as xr

from xrlint.plugins.xcube.rules.no_chunked_coords import NoChunkedCoords
from xrlint.testing import RuleTest, RuleTester
from tests.plugins.xcube.helpers import make_cube

valid_dataset_0 = xr.Dataset(attrs=dict(title="Empty"))
valid_dataset_1 = make_cube(360, 180, 3)
valid_dataset_2 = make_cube(90, 45, 20)
# ok, below default limit 5: ceil(20 / 5) = 4
valid_dataset_2.time.encoding["chunks"] = [4]

invalid_dataset_0 = make_cube(90, 45, 10)
# exceed default limit 5: ceil(10 / 1) = 10
invalid_dataset_0.time.encoding["chunks"] = [1]

NoChunkedCoordsTest = RuleTester.define_test(
"no-chunked-coords",
NoChunkedCoords,
valid=[
RuleTest(dataset=valid_dataset_0),
RuleTest(dataset=valid_dataset_1),
RuleTest(dataset=valid_dataset_2),
],
invalid=[
RuleTest(dataset=invalid_dataset_0),
],
)
1 change: 1 addition & 0 deletions tests/plugins/xcube/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def test_rules_complete(self):
"ml-dataset-meta",
"ml-dataset-time",
"ml-dataset-xy",
"no-chunked-coords",
"single-grid-mapping",
"time-naming",
},
Expand Down
1 change: 1 addition & 0 deletions xrlint/plugins/xcube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def export_plugin() -> Plugin:
"xcube/ml-dataset-meta": "error",
"xcube/ml-dataset-time": "warn",
"xcube/ml-dataset-xy": "error",
"xcube/no-chunked-coords": "warn",
"xcube/single-grid-mapping": "error",
"xcube/time-naming": "error",
},
Expand Down
50 changes: 50 additions & 0 deletions xrlint/plugins/xcube/rules/no_chunked_coords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import math

from xrlint.node import DataArrayNode
from xrlint.plugins.xcube.plugin import plugin
from xrlint.rule import RuleContext, RuleOp
from xrlint.util.schema import schema

DEFAULT_LIMIT = 5


@plugin.define_rule(
"no-chunked-coords",
version="1.0.0",
type="problem",
description=(
"Coordinate variables should not be chunked."
" Can be used to identify performance issues, where chunked coordinates"
" can cause slow opening if datasets due to the many chunk-fetching"
" requests made to (remote) filesystems with low bandwidth."
" You can use the `limit` parameter to specify an acceptable number "
f" of chunks. Its default is {DEFAULT_LIMIT}."
),
schema=schema(
"object",
properties=dict(
limit=schema(
"integer",
minimum=0,
default=DEFAULT_LIMIT,
title="Acceptable number of chunks",
)
),
),
)
class NoChunkedCoords(RuleOp):
def __init__(self, limit: int = DEFAULT_LIMIT):
self.limit = limit

def data_array(self, ctx: RuleContext, node: DataArrayNode):
if node.name not in ctx.dataset.coords or node.data_array.ndim != 1:
return

chunks = node.data_array.encoding.get("chunks")
if isinstance(chunks, (list, tuple)) and len(chunks) == 1:
num_chunks = math.ceil(node.data_array.size / chunks[0])
if num_chunks > self.limit:
ctx.report(
f"Number of chunks exceeds limit: {num_chunks} > {self.limit}.",
suggestions=["Combine chunks into a one or more larger ones."],
)
5 changes: 5 additions & 0 deletions xrlint/util/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def schema(
default: Any | None = None,
const: Any | None = None,
enum: list[Any] | None = None,
title: str | None = None,
description: str | None = None,
# "integer", "number"
minimum: int | float | None = None,
maximum: int | float | None = None,
Expand All @@ -43,6 +45,7 @@ def schema(
additionalProperties: bool | None = None,
required: list[str] | None = None,
) -> JsonSchema:
"""Helper function so you have keyword-arguments for creating schemas."""
return {
k: v
for k, v in dict(
Expand All @@ -56,6 +59,8 @@ def schema(
properties=properties,
additionalProperties=False if additionalProperties is False else None,
required=required,
title=title,
description=description,
).items()
if v is not None
}
Expand Down
Loading