diff --git a/.gitignore b/.gitignore index 7e9fd1e..a90bd65 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # xrlint /xrlint_config.* +/notebooks/xrlint_config.* # Logs *.log diff --git a/CHANGES.md b/CHANGES.md index fcf7e1e..77be14c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,9 +2,12 @@ ## Version 0.1.0 (in development) +- Added CLI option `--print-config PATH`, see same option in ESLint - XRLint CLI now outputs single results immediately to console, instead only after all results have been collected. - +- Refactored and renamed `CliEngine` into `XRLint`. Documented the class. +- `new_linter()` now uses a config name arg instead of a bool arg. +- Split example notebook into two ## Early development snapshots diff --git a/docs/api.md b/docs/api.md index 2ac673c..52b60d9 100644 --- a/docs/api.md +++ b/docs/api.md @@ -2,6 +2,10 @@ All described objects can be imported from the `xrlint.all` module. +## Class `XRLint` + +::: xrlint.cli.engine.XRLint + ## Function `new_linter()` ::: xrlint.linter.new_linter diff --git a/docs/cli.md b/docs/cli.md index a3266c4..6fcb212 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -8,23 +8,27 @@ Usage: xrlint [OPTIONS] [FILES]... Validate the given dataset FILES. - Reads configuration from `xrlint.config.*` if file exists and unless `--no- - default-config` is set or `--config PATH` is provided. Then validates each - dataset in FILES against the configuration. The validation result is dumped - to standard output if not otherwise stated by `--output-file PATH`. The - output format is `simple`. Other inbuilt formats are `json` and `html` which - can by setting the `--format NAME` option. + Reads configuration from `./xrlint_config.*` if such file exists and unless + `--no_config_lookup` is set or `--config` is provided. Then validates each + dataset in FILES against the configuration. The default dataset patters are + `**/*.zarr` and `**/.nc`. FILES may comprise also directories. If a + directory is not matched by any file pattern, it will be traversed + recursively. The validation result is dumped to standard output if not + otherwise stated by `--output-file`. The output format is `simple` by + default. Other inbuilt formats are `json` and `html` which you can specify + using the `--format` option. Options: - --no-default-config Disable use of default configuration from + --no-config-lookup Disable use of default configuration from xrlint_config.* - -c, --config PATH Use this configuration, overriding xrlint_config.* + -c, --config FILE Use this configuration, overriding xrlint_config.* config options if present + --print-config FILE Print the configuration for the given file --plugin MODULE Specify plugins. MODULE is the name of Python module that defines an 'export_plugin()' function. --rule SPEC Specify rules. SPEC must have format ': ' (note the space character). - -o, --output-file PATH Specify file to write report to + -o, --output-file FILE Specify file to write report to -f, --format NAME Use a specific output format - default: simple --color / --no-color Force enabling/disabling of color --max-warnings COUNT Number of warnings to trigger nonzero exit code - @@ -32,4 +36,5 @@ Options: --init Write initial configuration file and exit. --version Show the version and exit. --help Show this message and exit. + ``` diff --git a/docs/start.md b/docs/start.md index 32fe2e2..a513c23 100644 --- a/docs/start.md +++ b/docs/start.md @@ -105,6 +105,6 @@ import xrlint.all as xrl test_ds = xr.Dataset(attrs={"title": "Test Dataset"}) -linter = xrl.new_linter(recommended=True) +linter = xrl.new_linter("recommended") linter.verify_dataset(test_ds) ``` diff --git a/docs/todo.md b/docs/todo.md index 6ebcd19..2c4599e 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -13,16 +13,11 @@ - project logo - if configuration for given FILE is empty, report an error, see TODO in CLI main tests -- rename `xrlint.cli.CliEngine` into `xrlint.cli.XRLint` - (with similar API as the `ESLint` class) and export it - from `xrlint.all`. Value of `FILES` should be passed to - `verify_datasets()` methods. - use `RuleMeta.docs_url` in formatters to create links - implement xarray backend for xcube 'levels' format so can validate them too - add some more tests so we reach 99% coverage - support rule op args/kwargs schema validation -- support CLI option `--print-config FILE`, see ESLint - Support `RuleTest.expected`, it is currently unused ## Nice to have diff --git a/notebooks/mkdataset.py b/notebooks/mkdataset.py new file mode 100644 index 0000000..62b7413 --- /dev/null +++ b/notebooks/mkdataset.py @@ -0,0 +1,64 @@ +import numpy as np +import xarray as xr + +nx = 2 +ny = 3 +nt = 4 + + +def make_dataset() -> xr.Dataset: + """Create a dataset that passes xrlint core rules.""" + + return xr.Dataset( + attrs=dict(title="SST-Climatology Subset"), + coords={ + "x": xr.DataArray( + np.linspace(-180, 180, nx), + dims="x", + attrs={"units": "degrees"} + ), + "y": xr.DataArray( + np.linspace(-90, 90, ny), + dims="y", + attrs={"units": "degrees"} + ), + "time": xr.DataArray( + [2010 + y for y in range(nt)], + dims="time", + attrs={"units": "years"} + ), + "spatial_ref": xr.DataArray( + 0, + attrs={ + "grid_mapping_name": "latitude_longitude", + "semi_major_axis": 6371000.0, + "inverse_flattening": 0, + }, + ), + }, + data_vars={ + "sst": xr.DataArray( + np.random.random((nt, ny, nx)), + dims=["time", "y", "x"], + attrs={"units": "kelvin", "grid_mapping": "spatial_ref"} + ), + "sst_anomaly": xr.DataArray( + np.random.random((nt, ny, nx)), + dims=["time", "y", "x"], + attrs={"units": "kelvin", "grid_mapping": "spatial_ref"} + ) + }, + ) + + +def make_dataset_with_issues() -> xr.Dataset: + """Create a dataset that produces issues with xrlint core rules.""" + invalid_ds = make_dataset() + invalid_ds.attrs = {} + invalid_ds.sst.attrs["units"] = 1 + invalid_ds["sst_avg"] = xr.DataArray( + np.random.random((nx, ny)), + dims=["x", "y"], + attrs={"units": "kelvin"} + ) + return invalid_ds diff --git a/notebooks/xrlint-api-demo.ipynb b/notebooks/xrlint-api-demo.ipynb deleted file mode 100644 index 65355fd..0000000 --- a/notebooks/xrlint-api-demo.ipynb +++ /dev/null @@ -1,694 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import xarray as xr\n", - "import xrlint.all as xrl" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# XRLint" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "### Basic API Usage" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'0.0.3'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "xrl.version" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "nx = 2\n", - "ny = 3\n", - "nt = 4\n", - "ds = xr.Dataset(\n", - " attrs=dict(title=\"SST-Climatology Subset\"),\n", - " coords={\n", - " \"x\": xr.DataArray(\n", - " np.linspace(-180, 180, nx),\n", - " dims=\"x\", \n", - " attrs={\"units\": \"degrees\"}\n", - " ),\n", - " \"y\": xr.DataArray(\n", - " np.linspace(-90, 90, ny),\n", - " dims=\"y\", \n", - " attrs={\"units\": \"degrees\"}\n", - " ),\n", - " \"time\": xr.DataArray(\n", - " [2010 + y for y in range(nt)], \n", - " dims=\"time\", \n", - " attrs={\"units\": \"years\"}\n", - " ),\n", - " \"spatial_ref\": xr.DataArray(\n", - " 0,\n", - " attrs={\n", - " \"grid_mapping_name\": \"latitude_longitude\",\n", - " \"semi_major_axis\": 6371000.0,\n", - " \"inverse_flattening\": 0,\n", - " },\n", - " ),\n", - " },\n", - " data_vars={\n", - " \"sst\": xr.DataArray(\n", - " np.random.random((nt, ny, nx)), \n", - " dims=[\"time\", \"y\", \"x\"], \n", - " attrs={\"units\": \"kelvin\", \"grid_mapping\": \"spatial_ref\"}\n", - " ),\n", - " \"sst_anomaly\": xr.DataArray(\n", - " np.random.random((nt, ny, nx)), \n", - " dims=[\"time\", \"y\", \"x\"], \n", - " attrs={\"units\": \"kelvin\", \"grid_mapping\": \"spatial_ref\"}\n", - " )\n", - " },\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "linter = xrl.new_linter(recommended=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "

<dataset> - ok

\n" - ], - "text/plain": [ - "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.0.1'), configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', docs_url=None, schema=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', docs_url=None, schema=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', docs_url=None, schema=None, type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', docs_url=None, schema=None, type='suggestion'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", docs_url=None, schema=None, type='suggestion'), op_class=)}, processors={})}, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[], fixable_error_count=0, fixable_warning_count=0, error_count=0, fatal_error_count=0, warning_count=0)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linter.verify_dataset(ds)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "invalid_ds = ds.copy()\n", - "invalid_ds.attrs = {}\n", - "invalid_ds.sst.attrs[\"units\"] = 1\n", - "invalid_ds[\"sst_avg\"] = xr.DataArray(\n", - " np.random.random((nx, ny)), \n", - " dims=[\"x\", \"y\"], \n", - " attrs={\"units\": \"kelvin\"}\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "

<dataset>:

\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
dataset warnMissing 'title' attribute in dataset. dataset-title-attr
dataset.attrs warnMissing metadata, attributes are empty. no-empty-attrs
dataset.data_vars['sst']warnInvalid 'units' attribute in variable 'sst'.var-units-attr

3 warnings

\n" - ], - "text/plain": [ - "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.0.1'), configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', docs_url=None, schema=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', docs_url=None, schema=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', docs_url=None, schema=None, type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', docs_url=None, schema=None, type='suggestion'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", docs_url=None, schema=None, type='suggestion'), op_class=)}, processors={})}, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[Message(message=\"Missing 'title' attribute in dataset.\", node_path='dataset', rule_id='dataset-title-attr', severity=1, fatal=None, fix=None, suggestions=None), Message(message='Missing metadata, attributes are empty.', node_path='dataset.attrs', rule_id='no-empty-attrs', severity=1, fatal=None, fix=None, suggestions=[Suggestion(desc='Make sure to add appropriate metadata attributes to dataset elements.', data=None, fix=None)]), Message(message=\"Invalid 'units' attribute in variable 'sst'.\", node_path=\"dataset.data_vars['sst']\", rule_id='var-units-attr', severity=1, fatal=None, fix=None, suggestions=None)], fixable_error_count=0, fixable_warning_count=0, error_count=0, fatal_error_count=0, warning_count=3)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linter.verify_dataset(invalid_ds)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pass the configuration of rules via `rules`, which maps rule names to rule configurations.\n", - "A rule configuration is either a _severity_, or a list where the first element is a rule \n", - "_severity_ and subsequent elements are rule arguments: \n", - "\n", - "- _severity_\n", - "- `[`_severity_`]`\n", - "- `[`_severity_`,` _arg-1 | kwargs_ `]`\n", - "- `[`_severity_`,` _arg-1_`,` _arg-2_`,` ...`,` _arg-n | kwargs_`]`\n", - "\n", - "Here, _severity_ is either a\n", - "\n", - "- one of `\"error\"`, `\"warn\"`, `\"off\"` or \n", - "- one of `2` (error), `1` (warn), `0` (off)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "linter = xrl.new_linter(\n", - " rules={\n", - " \"no-empty-attrs\": \"warn\",\n", - " \"dataset-title-attr\": \"warn\",\n", - " \"grid-mappings\": \"error\",\n", - " \"var-units-attr\": \"error\",\n", - " \"xcube/cube-dims-order\": \"off\",\n", - " \"xcube/single-grid-mapping\": \"error\",\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "

<dataset>:

\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
dataset.attrs warn Missing metadata, attributes are empty. no-empty-attrs
dataset warn Missing 'title' attribute in dataset. dataset-title-attr
dataset.data_vars['sst']errorInvalid 'units' attribute in variable 'sst'.var-units-attr
errorunknown plugin 'xcube' xcube/cube-dims-order
errorunknown plugin 'xcube' xcube/single-grid-mapping

5 problems (3 errors and 2 warnings)

\n" - ], - "text/plain": [ - "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.0.1'), configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', docs_url=None, schema=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', docs_url=None, schema=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', docs_url=None, schema=None, type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', docs_url=None, schema=None, type='suggestion'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", docs_url=None, schema=None, type='suggestion'), op_class=)}, processors={})}, rules={'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/cube-dims-order': RuleConfig(severity=0, args=(), kwargs={}), 'xcube/single-grid-mapping': RuleConfig(severity=2, args=(), kwargs={})}, settings=None), file_path='', messages=[Message(message='Missing metadata, attributes are empty.', node_path='dataset.attrs', rule_id='no-empty-attrs', severity=1, fatal=None, fix=None, suggestions=[Suggestion(desc='Make sure to add appropriate metadata attributes to dataset elements.', data=None, fix=None)]), Message(message=\"Missing 'title' attribute in dataset.\", node_path='dataset', rule_id='dataset-title-attr', severity=1, fatal=None, fix=None, suggestions=None), Message(message=\"Invalid 'units' attribute in variable 'sst'.\", node_path=\"dataset.data_vars['sst']\", rule_id='var-units-attr', severity=2, fatal=None, fix=None, suggestions=None), Message(message=\"unknown plugin 'xcube'\", node_path=None, rule_id='xcube/cube-dims-order', severity=2, fatal=True, fix=None, suggestions=None), Message(message=\"unknown plugin 'xcube'\", node_path=None, rule_id='xcube/single-grid-mapping', severity=2, fatal=True, fix=None, suggestions=None)], fixable_error_count=0, fixable_warning_count=0, error_count=3, fatal_error_count=2, warning_count=2)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linter.verify_dataset(invalid_ds)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "### Configure Plugins" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from xrlint.plugins.core import export_plugin \n", - "\n", - "core_plugin = export_plugin()\n", - "\n", - "linter = xrl.Linter(\n", - " plugins={\n", - " \"humpty-dumpty\": core_plugin\n", - " }, \n", - " rules={\n", - " \"humpty-dumpty/no-empty-attrs\": \"warn\",\n", - " \"humpty-dumpty/dataset-title-attr\": \"error\",\n", - " \"humpty-dumpty/var-units-attr\": \"warn\"\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "

<dataset>:

\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
dataset.attrs warn Missing metadata, attributes are empty. humpty-dumpty/no-empty-attrs
dataset errorMissing 'title' attribute in dataset. humpty-dumpty/dataset-title-attr
dataset.data_vars['sst']warn Invalid 'units' attribute in variable 'sst'.humpty-dumpty/var-units-attr

3 problems (one error and 2 warnings)

\n" - ], - "text/plain": [ - "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'humpty-dumpty': Plugin(meta=PluginMeta(name='__core__', version='0.0.1'), configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', docs_url=None, schema=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', docs_url=None, schema=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', docs_url=None, schema=None, type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', docs_url=None, schema=None, type='suggestion'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", docs_url=None, schema=None, type='suggestion'), op_class=)}, processors={})}, rules={'humpty-dumpty/no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'humpty-dumpty/dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'humpty-dumpty/var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[Message(message='Missing metadata, attributes are empty.', node_path='dataset.attrs', rule_id='humpty-dumpty/no-empty-attrs', severity=1, fatal=None, fix=None, suggestions=[Suggestion(desc='Make sure to add appropriate metadata attributes to dataset elements.', data=None, fix=None)]), Message(message=\"Missing 'title' attribute in dataset.\", node_path='dataset', rule_id='humpty-dumpty/dataset-title-attr', severity=2, fatal=None, fix=None, suggestions=None), Message(message=\"Invalid 'units' attribute in variable 'sst'.\", node_path=\"dataset.data_vars['sst']\", rule_id='humpty-dumpty/var-units-attr', severity=1, fatal=None, fix=None, suggestions=None)], fixable_error_count=0, fixable_warning_count=0, error_count=1, fatal_error_count=0, warning_count=2)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linter.verify_dataset(invalid_ds)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "### XRLint objects" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By default, a `Linter` has no configuration." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "linter = xrl.Linter()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linter.config.plugins is None" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linter.config.rules is None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `new_linter()` function returns a `Linter` pre-configured with builtin plugins and their recommended rules." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "linter = xrl.new_linter()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['__core__']" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(linter.config.plugins.keys())" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "linter.config.rules" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If the `new_linter()` function is called with `recommended=False` it still has the builtin plugins, but without any rule configurations." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "linter = xrl.new_linter(recommended=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['__core__']" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(linter.config.plugins.keys())" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linter.config.rules is None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "### XRLint CLI" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Usage: xrlint [OPTIONS] [FILES]...\n", - "\n", - " Validate the given dataset FILES.\n", - "\n", - " Reads configuration from `xrlint.config.*` if file exists and unless `--no-\n", - " default-config` is set or `--config PATH` is provided. Then validates each\n", - " dataset in FILES against the configuration. The validation result is dumped\n", - " to standard output if not otherwise stated by `--output-file PATH`. The\n", - " output format is `simple`. Other inbuilt formats are `json` and `html` which\n", - " can by setting the `--format NAME` option.\n", - "\n", - "Options:\n", - " --no-default-config Disable use of default configuration from\n", - " xrlint_config.*\n", - " -c, --config PATH Use this configuration, overriding xrlint_config.*\n", - " config options if present\n", - " --plugin MODULE Specify plugins. MODULE is the name of Python module\n", - " that defines an 'export_plugin()' function.\n", - " --rule SPEC Specify rules. SPEC must have format ':\n", - " ' (note the space character).\n", - " -f, --format NAME Use a specific output format - default: simple\n", - " -o, --output-file PATH Specify file to write report to\n", - " --max-warnings COUNT Number of warnings to trigger nonzero exit code -\n", - " default: 5\n", - " --init Write initial configuration file and exit.\n", - " --version Show the version and exit.\n", - " --help Show this message and exit.\n" - ] - } - ], - "source": [ - "!xrlint --help" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'C:\\\\Users\\\\norma\\\\Projects\\\\xrlint\\\\notebooks'" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import os\n", - "os.getcwd()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.to_zarr(\"valid.zarr\", mode=\"w\")" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "invalid_ds.to_zarr(\"invalid.zarr\", mode=\"w\")" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: no configuration file found.\n", - "\n", - "valid.zarr:\n", - " \u001b[3;31merror\u001b[0m No rules configured or applicable. \u001b[2;34m\u001b]8;;https://bcdev.github.io/xrlint\u001b\\https://bcdev.github.io/xrlint\u001b]8;;\u001b\\\u001b[0m\n", - "\n", - "invalid.zarr:\n", - " \u001b[3;31merror\u001b[0m No rules configured or applicable. \u001b[2;34m\u001b]8;;https://bcdev.github.io/xrlint\u001b\\https://bcdev.github.io/xrlint\u001b]8;;\u001b\\\u001b[0m\n", - "\n", - "2 errors\n", - "\n" - ] - } - ], - "source": [ - "!xrlint valid.zarr invalid.zarr" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: no configuration file found.\n", - "
\n", - "

Results

\n", - "
\n", - "

valid.zarr:

\n", - "\n", - "\n", - "\n", - "\n", - "
errorNo rules configured or applicable.

one error

\n", - "
\n", - "
\n", - "
\n", - "

invalid.zarr:

\n", - "\n", - "\n", - "\n", - "\n", - "
errorNo rules configured or applicable.

one error

\n", - "
\n", - "
\n", - "\n" - ] - } - ], - "source": [ - "!xrlint valid.zarr invalid.zarr -f html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/xrlint-cli.ipynb b/notebooks/xrlint-cli.ipynb new file mode 100644 index 0000000..5b471c9 --- /dev/null +++ b/notebooks/xrlint-cli.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## XRLint CLI\n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Usage: xrlint [OPTIONS] [FILES]...\n", + "\n", + " Validate the given dataset FILES.\n", + "\n", + " Reads configuration from `./xrlint_config.*` if such file exists and unless\n", + " `--no_config_lookup` is set or `--config` is provided. Then validates each\n", + " dataset in FILES against the configuration. The default dataset patters are\n", + " `**/*.zarr` and `**/.nc`. FILES may comprise also directories. If a\n", + " directory is not matched by any file pattern, it will be traversed\n", + " recursively. The validation result is dumped to standard output if not\n", + " otherwise stated by `--output-file`. The output format is `simple` by\n", + " default. Other inbuilt formats are `json` and `html` which you can specify\n", + " using the `--format` option.\n", + "\n", + "Options:\n", + " --no-config-lookup Disable use of default configuration from\n", + " xrlint_config.*\n", + " -c, --config FILE Use this configuration, overriding xrlint_config.*\n", + " config options if present\n", + " --print-config FILE Print the configuration for the given file\n", + " --plugin MODULE Specify plugins. MODULE is the name of Python module\n", + " that defines an 'export_plugin()' function.\n", + " --rule SPEC Specify rules. SPEC must have format ':\n", + " ' (note the space character).\n", + " -o, --output-file FILE Specify file to write report to\n", + " -f, --format NAME Use a specific output format - default: simple\n", + " --color / --no-color Force enabling/disabling of color\n", + " --max-warnings COUNT Number of warnings to trigger nonzero exit code -\n", + " default: 5\n", + " --init Write initial configuration file and exit.\n", + " --version Show the version and exit.\n", + " --help Show this message and exit.\n" + ] + } + ], + "source": [ + "!xrlint --help" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'C:\\\\Users\\\\norma\\\\Projects\\\\xrlint\\\\notebooks'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Error: file xrlint_config.yaml already exists.\n" + ] + } + ], + "source": [ + "!xrlint --init" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from mkdataset import make_dataset, make_dataset_with_issues\n", + "\n", + "make_dataset().to_zarr(\"valid.zarr\", mode=\"w\")\n", + "make_dataset_with_issues().to_zarr(\"invalid.zarr\", mode=\"w\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "valid.zarr - ok\n", + "\n", + "invalid.zarr:\n", + "dataset warn Missing 'title' attribute in dataset. dataset-title-attr\n", + "dataset.attrs warn Missing metadata, attributes are empty. no-empty-attrs\n", + "dataset.data_vars['sst'] warn Invalid 'units' attribute in variable 'sst'. var-units-attr\n", + "\n", + "3 warnings\n", + "\n" + ] + } + ], + "source": [ + "!xrlint --no-color valid.zarr invalid.zarr" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "
\n", + "

Results

\n", + "
\n", + "

valid.zarr - ok

\n", + "
\n", + "
\n", + "
\n", + "

invalid.zarr:

\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
dataset warnMissing 'title' attribute in dataset. dataset-title-attr
dataset.attrs warnMissing metadata, attributes are empty. no-empty-attrs
dataset.data_vars['sst']warnInvalid 'units' attribute in variable 'sst'.var-units-attr

3 warnings

\n", + "
\n", + "
\n", + "\n" + ] + } + ], + "source": [ + "!xrlint valid.zarr invalid.zarr -f html" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"name\": \"\",\n", + " \"plugins\": {\n", + " \"__core__\": \"xrlint.plugins.core\"\n", + " },\n", + " \"rules\": {\n", + " \"coords-for-dims\": 2,\n", + " \"dataset-title-attr\": 1,\n", + " \"grid-mappings\": 2,\n", + " \"no-empty-attrs\": 1,\n", + " \"var-units-attr\": 1\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "!xrlint --print-config valid.zarr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/xrlint-linter.ipynb b/notebooks/xrlint-linter.ipynb new file mode 100644 index 0000000..2e4ba44 --- /dev/null +++ b/notebooks/xrlint-linter.ipynb @@ -0,0 +1,1352 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import xarray as xr\n", + "import xrlint.all as xrl" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# XRLint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "### Basic API Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.1.0.dev0'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xrl.version" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 464B\n",
+       "Dimensions:      (x: 2, y: 3, time: 4)\n",
+       "Coordinates:\n",
+       "  * x            (x) float64 16B -180.0 180.0\n",
+       "  * y            (y) float64 24B -90.0 0.0 90.0\n",
+       "  * time         (time) int64 32B 2010 2011 2012 2013\n",
+       "    spatial_ref  int64 8B 0\n",
+       "Data variables:\n",
+       "    sst          (time, y, x) float64 192B 0.6358 0.8142 ... 0.7259 0.7808\n",
+       "    sst_anomaly  (time, y, x) float64 192B 0.6082 0.7162 ... 0.8605 0.3188\n",
+       "Attributes:\n",
+       "    title:    SST-Climatology Subset
" + ], + "text/plain": [ + " Size: 464B\n", + "Dimensions: (x: 2, y: 3, time: 4)\n", + "Coordinates:\n", + " * x (x) float64 16B -180.0 180.0\n", + " * y (y) float64 24B -90.0 0.0 90.0\n", + " * time (time) int64 32B 2010 2011 2012 2013\n", + " spatial_ref int64 8B 0\n", + "Data variables:\n", + " sst (time, y, x) float64 192B 0.6358 0.8142 ... 0.7259 0.7808\n", + " sst_anomaly (time, y, x) float64 192B 0.6082 0.7162 ... 0.8605 0.3188\n", + "Attributes:\n", + " title: SST-Climatology Subset" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from mkdataset import make_dataset\n", + "ds = make_dataset()\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "linter = xrl.new_linter(\"recommended\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

<dataset> - ok

\n" + ], + "text/plain": [ + "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.1.0.dev0', module='xrlint.plugins.core'), configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', docs_url=None, schema=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', docs_url=None, schema=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', docs_url=None, schema=None, type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', docs_url=None, schema=None, type='suggestion'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", docs_url=None, schema=None, type='suggestion'), op_class=)}, processors={})}, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[], fixable_error_count=0, fixable_warning_count=0, error_count=0, fatal_error_count=0, warning_count=0)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linter.verify_dataset(ds)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 512B\n",
+       "Dimensions:      (x: 2, y: 3, time: 4)\n",
+       "Coordinates:\n",
+       "  * x            (x) float64 16B -180.0 180.0\n",
+       "  * y            (y) float64 24B -90.0 0.0 90.0\n",
+       "  * time         (time) int64 32B 2010 2011 2012 2013\n",
+       "    spatial_ref  int64 8B 0\n",
+       "Data variables:\n",
+       "    sst          (time, y, x) float64 192B 0.9772 0.2759 ... 0.08963 0.3726\n",
+       "    sst_anomaly  (time, y, x) float64 192B 0.9646 0.2598 ... 0.4992 0.4611\n",
+       "    sst_avg      (x, y) float64 48B 0.7927 0.9416 0.3733 0.389 0.596 0.02801
" + ], + "text/plain": [ + " Size: 512B\n", + "Dimensions: (x: 2, y: 3, time: 4)\n", + "Coordinates:\n", + " * x (x) float64 16B -180.0 180.0\n", + " * y (y) float64 24B -90.0 0.0 90.0\n", + " * time (time) int64 32B 2010 2011 2012 2013\n", + " spatial_ref int64 8B 0\n", + "Data variables:\n", + " sst (time, y, x) float64 192B 0.9772 0.2759 ... 0.08963 0.3726\n", + " sst_anomaly (time, y, x) float64 192B 0.9646 0.2598 ... 0.4992 0.4611\n", + " sst_avg (x, y) float64 48B 0.7927 0.9416 0.3733 0.389 0.596 0.02801" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from mkdataset import make_dataset_with_issues\n", + "\n", + "invalid_ds = make_dataset_with_issues()\n", + "invalid_ds" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

<dataset>:

\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
dataset warnMissing 'title' attribute in dataset. dataset-title-attr
dataset.attrs warnMissing metadata, attributes are empty. no-empty-attrs
dataset.data_vars['sst']warnInvalid 'units' attribute in variable 'sst'.var-units-attr

3 warnings

\n" + ], + "text/plain": [ + "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.1.0.dev0', module='xrlint.plugins.core'), configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', docs_url=None, schema=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', docs_url=None, schema=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', docs_url=None, schema=None, type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', docs_url=None, schema=None, type='suggestion'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", docs_url=None, schema=None, type='suggestion'), op_class=)}, processors={})}, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[Message(message=\"Missing 'title' attribute in dataset.\", node_path='dataset', rule_id='dataset-title-attr', severity=1, fatal=None, fix=None, suggestions=None), Message(message='Missing metadata, attributes are empty.', node_path='dataset.attrs', rule_id='no-empty-attrs', severity=1, fatal=None, fix=None, suggestions=[Suggestion(desc='Make sure to add appropriate metadata attributes to dataset elements.', data=None, fix=None)]), Message(message=\"Invalid 'units' attribute in variable 'sst'.\", node_path=\"dataset.data_vars['sst']\", rule_id='var-units-attr', severity=1, fatal=None, fix=None, suggestions=None)], fixable_error_count=0, fixable_warning_count=0, error_count=0, fatal_error_count=0, warning_count=3)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linter.verify_dataset(invalid_ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pass the configuration of rules via `rules`, which maps rule names to rule configurations.\n", + "A rule configuration is either a _severity_, or a list where the first element is a rule \n", + "_severity_ and subsequent elements are rule arguments: \n", + "\n", + "- _severity_\n", + "- `[`_severity_`]`\n", + "- `[`_severity_`,` _arg-1 | kwargs_ `]`\n", + "- `[`_severity_`,` _arg-1_`,` _arg-2_`,` ...`,` _arg-n | kwargs_`]`\n", + "\n", + "Here, _severity_ is either a\n", + "\n", + "- one of `\"error\"`, `\"warn\"`, `\"off\"` or \n", + "- one of `2` (error), `1` (warn), `0` (off)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "from xrlint.plugins.xcube import export_plugin \n", + "\n", + "xcube_plugin = export_plugin()\n", + "xcube_recommended = xcube_plugin.configs[\"recommended\"]\n", + "\n", + "linter = xrl.new_linter(\n", + " \"recommended\",\n", + " plugins={\"xcube\": xcube_plugin},\n", + " rules={\n", + " \"no-empty-attrs\": \"warn\",\n", + " \"dataset-title-attr\": \"warn\",\n", + " \"grid-mappings\": \"error\",\n", + " \"var-units-attr\": \"error\",\n", + " **xcube_recommended.rules\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

<dataset>:

\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
dataset warn Missing 'title' attribute in dataset. dataset-title-attr
dataset.attrs warn Missing metadata, attributes are empty. no-empty-attrs
dataset.data_vars['sst'] errorInvalid 'units' attribute in variable 'sst'. var-units-attr
dataset.data_vars['sst_avg'] errorOrder of dimensions should be y,x, but found x,y.xcube/cube-dims-order
dataset.data_vars['sst'] warn Missing attribute 'color_bar_name' xcube/data-var-colors
dataset.data_vars['sst_anomaly']warn Missing attribute 'color_bar_name' xcube/data-var-colors
dataset.data_vars['sst_avg'] warn Missing attribute 'color_bar_name' xcube/data-var-colors

7 problems (2 errors and 5 warnings)

\n" + ], + "text/plain": [ + "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.1.0.dev0', module='xrlint.plugins.core'), configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', docs_url=None, schema=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', docs_url=None, schema=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', docs_url=None, schema=None, type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', docs_url=None, schema=None, type='suggestion'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", docs_url=None, schema=None, type='suggestion'), op_class=)}, processors={}), 'xcube': Plugin(meta=PluginMeta(name='xcube', version='0.1.0.dev0', module='xrlint.plugins.xcube'), configs={'recommended': Config(name='xcube-recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'xcube': ...}, rules={'xcube/any-spatial-data-var': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/cube-dims-order': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/data-var-colors': RuleConfig(severity=1, args=(), kwargs={}), 'xcube/grid-mapping-naming': RuleConfig(severity=1, args=(), kwargs={}), 'xcube/increasing-time': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/lat-lon-naming': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/single-grid-mapping': RuleConfig(severity=2, args=(), kwargs={})}, settings=None), 'all': Config(name='xcube-all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'xcube': ...}, rules={'xcube/any-spatial-data-var': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/cube-dims-order': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/data-var-colors': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/grid-mapping-naming': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/increasing-time': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/lat-lon-naming': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/single-grid-mapping': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'any-spatial-data-var': Rule(meta=RuleMeta(name='any-spatial-data-var', version='1.0.0', description='A datacube should have spatial data variables.', docs_url=None, schema=None, type='problem'), op_class=), 'cube-dims-order': Rule(meta=RuleMeta(name='cube-dims-order', version='1.0.0', description='Order of dimensions in spatio-temporal datacube variables should be [time, ..., y, x].', docs_url=None, schema=None, type='problem'), op_class=), 'data-var-colors': Rule(meta=RuleMeta(name='data-var-colors', version='1.0.0', description='Spatial data variables should encode xcube color mappings in their metadata.', docs_url='https://xcube.readthedocs.io/en/latest/cubespec.html#encoding-of-colors', schema=None, type='suggestion'), op_class=), 'grid-mapping-naming': Rule(meta=RuleMeta(name='grid-mapping-naming', version='1.0.0', description=\"Grid mapping variables should be called 'spatial_ref' or 'crs' for compatibility with rioxarray and other packages.\", docs_url=None, schema=None, type='suggestion'), op_class=), 'increasing-time': Rule(meta=RuleMeta(name='increasing-time', version='1.0.0', description='Time coordinate labels should be monotonically increasing.', docs_url=None, schema=None, type='problem'), op_class=), 'lat-lon-naming': Rule(meta=RuleMeta(name='lat-lon-naming', version='1.0.0', description=\"Latitude and longitude coordinates and dimensions should be called 'lat' and 'lon'.\", docs_url=None, schema=None, type='problem'), op_class=), 'single-grid-mapping': Rule(meta=RuleMeta(name='single-grid-mapping', version='1.0.0', description='A single grid mapping shall be used for all spatial data variables of a datacube.', docs_url=None, schema=None, type='problem'), op_class=)}, processors={})}, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=[], kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=[], kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=[], kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/any-spatial-data-var': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/cube-dims-order': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/data-var-colors': RuleConfig(severity=1, args=(), kwargs={}), 'xcube/grid-mapping-naming': RuleConfig(severity=1, args=(), kwargs={}), 'xcube/increasing-time': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/lat-lon-naming': RuleConfig(severity=2, args=(), kwargs={}), 'xcube/single-grid-mapping': RuleConfig(severity=2, args=(), kwargs={})}, settings=None), file_path='', messages=[Message(message=\"Missing 'title' attribute in dataset.\", node_path='dataset', rule_id='dataset-title-attr', severity=1, fatal=None, fix=None, suggestions=None), Message(message='Missing metadata, attributes are empty.', node_path='dataset.attrs', rule_id='no-empty-attrs', severity=1, fatal=None, fix=None, suggestions=[Suggestion(desc='Make sure to add appropriate metadata attributes to dataset elements.', data=None, fix=None)]), Message(message=\"Invalid 'units' attribute in variable 'sst'.\", node_path=\"dataset.data_vars['sst']\", rule_id='var-units-attr', severity=2, fatal=None, fix=None, suggestions=None), Message(message='Order of dimensions should be y,x, but found x,y.', node_path=\"dataset.data_vars['sst_avg']\", rule_id='xcube/cube-dims-order', severity=2, fatal=None, fix=None, suggestions=[Suggestion(desc='Use xarray.transpose(...) to reorder dimensions.', data=None, fix=None)]), Message(message=\"Missing attribute 'color_bar_name'\", node_path=\"dataset.data_vars['sst']\", rule_id='xcube/data-var-colors', severity=1, fatal=None, fix=None, suggestions=None), Message(message=\"Missing attribute 'color_bar_name'\", node_path=\"dataset.data_vars['sst_anomaly']\", rule_id='xcube/data-var-colors', severity=1, fatal=None, fix=None, suggestions=None), Message(message=\"Missing attribute 'color_bar_name'\", node_path=\"dataset.data_vars['sst_avg']\", rule_id='xcube/data-var-colors', severity=1, fatal=None, fix=None, suggestions=None)], fixable_error_count=0, fixable_warning_count=0, error_count=2, fatal_error_count=0, warning_count=5)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linter.verify_dataset(invalid_ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "### Configure Plugins" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "from xrlint.plugins.core import export_plugin \n", + "\n", + "core_plugin = export_plugin()\n", + "\n", + "linter = xrl.Linter(\n", + " plugins={\n", + " \"humpty-dumpty\": core_plugin\n", + " }, \n", + " rules={\n", + " \"humpty-dumpty/no-empty-attrs\": \"warn\",\n", + " \"humpty-dumpty/dataset-title-attr\": \"error\",\n", + " \"humpty-dumpty/var-units-attr\": \"warn\"\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

<dataset>:

\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
dataset.attrs warn Missing metadata, attributes are empty. humpty-dumpty/no-empty-attrs
dataset errorMissing 'title' attribute in dataset. humpty-dumpty/dataset-title-attr
dataset.data_vars['sst']warn Invalid 'units' attribute in variable 'sst'.humpty-dumpty/var-units-attr

3 problems (one error and 2 warnings)

\n" + ], + "text/plain": [ + "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'humpty-dumpty': Plugin(meta=PluginMeta(name='__core__', version='0.1.0.dev0', module='xrlint.plugins.core'), configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)}, rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', docs_url=None, schema=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', docs_url=None, schema=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', docs_url=None, schema=None, type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', docs_url=None, schema=None, type='suggestion'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", docs_url=None, schema=None, type='suggestion'), op_class=)}, processors={})}, rules={'humpty-dumpty/no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'humpty-dumpty/dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'humpty-dumpty/var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[Message(message='Missing metadata, attributes are empty.', node_path='dataset.attrs', rule_id='humpty-dumpty/no-empty-attrs', severity=1, fatal=None, fix=None, suggestions=[Suggestion(desc='Make sure to add appropriate metadata attributes to dataset elements.', data=None, fix=None)]), Message(message=\"Missing 'title' attribute in dataset.\", node_path='dataset', rule_id='humpty-dumpty/dataset-title-attr', severity=2, fatal=None, fix=None, suggestions=None), Message(message=\"Invalid 'units' attribute in variable 'sst'.\", node_path=\"dataset.data_vars['sst']\", rule_id='humpty-dumpty/var-units-attr', severity=1, fatal=None, fix=None, suggestions=None)], fixable_error_count=0, fixable_warning_count=0, error_count=1, fatal_error_count=0, warning_count=2)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linter.verify_dataset(invalid_ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "### XRLint objects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, a `Linter` has no configuration." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "linter = xrl.Linter()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linter.config.plugins is None" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linter.config.rules is None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `new_linter()` function returns a `Linter` pre-configured with builtin plugins and their recommended rules." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "linter = xrl.new_linter()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__core__']" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(linter.config.plugins.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "linter.config.rules" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If the `new_linter()` function is called with `\"recommended\"` it uses recommended rules from the core plugin." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "linter = xrl.new_linter(\"recommended\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__core__']" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(linter.config.plugins.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}),\n", + " 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}),\n", + " 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}),\n", + " 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}),\n", + " 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linter.config.rules" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py index 1e24732..942f12e 100644 --- a/tests/cli/test_main.py +++ b/tests/cli/test_main.py @@ -3,6 +3,7 @@ import shutil from unittest import TestCase +import click.testing from click.testing import CliRunner import xarray as xr @@ -17,8 +18,6 @@ dataset-title-attr: error """ -no_match_config_yaml = "[]" - # noinspection PyTypeChecker class CliMainTest(TestCase): @@ -55,23 +54,32 @@ def tearDownClass(cls): os.chdir(cls.last_cwd) shutil.rmtree(cls.temp_dir) - def test_no_files(self): + def xrlint(self, *args: tuple[str, ...]) -> click.testing.Result: runner = CliRunner() - result = runner.invoke(main) - self.assertIn("No dataset files provided.", result.output) - self.assertEqual(1, result.exit_code) + result = runner.invoke(main, args) + if not isinstance(result.exception, SystemExit): + self.assertIsNone(None, result.exception) + return result - def test_files_no_rules(self): - runner = CliRunner() - result = runner.invoke(main, self.files) - self.assertIn("Warning: no configuration file found.", result.output) - self.assertIn("No rules configured or applicable.", result.output) + def test_no_files_no_config(self): + result = self.xrlint() + self.assertEqual("", result.output) + self.assertEqual(0, result.exit_code) + + def test_config_no_files(self): + with text_file(DEFAULT_CONFIG_FILE_YAML, self.ok_config_yaml): + result = self.xrlint() + self.assertEqual("", result.output) + self.assertEqual(0, result.exit_code) + + def test_files_no_config(self): + result = self.xrlint(*self.files) + self.assertIn("Warning: no configuration file found.\n", result.output) self.assertEqual(1, result.exit_code) def test_files_one_rule(self): with text_file(DEFAULT_CONFIG_FILE_YAML, self.ok_config_yaml): - runner = CliRunner() - result = runner.invoke(main, ["--no-color"] + self.files) + result = self.xrlint("--no-color", *self.files) self.assertEqual( "\n" "dataset1.zarr - ok\n\n" @@ -84,16 +92,14 @@ def test_files_one_rule(self): self.assertEqual(0, result.exit_code) with text_file(DEFAULT_CONFIG_FILE_YAML, self.fail_config_yaml): - runner = CliRunner() - result = runner.invoke(main, self.files) + result = self.xrlint(*self.files) self.assertIn("Missing metadata, attributes are empty.", result.output) self.assertIn("no-empty-attrs", result.output) self.assertEqual(1, result.exit_code) def test_dir_one_rule(self): with text_file(DEFAULT_CONFIG_FILE_YAML, self.ok_config_yaml): - runner = CliRunner() - result = runner.invoke(main, ["--no-color", "."]) + result = self.xrlint("--no-color", ".") prefix = self.temp_dir.replace("\\", "/") self.assertIn(f"{prefix}/dataset1.zarr - ok\n\n", result.output) self.assertIn(f"{prefix}/dataset1.nc - ok\n\n", result.output) @@ -103,16 +109,15 @@ def test_dir_one_rule(self): self.assertEqual(0, result.exit_code) with text_file(DEFAULT_CONFIG_FILE_YAML, self.fail_config_yaml): - runner = CliRunner() - result = runner.invoke(main, self.files) + result = self.xrlint(*self.files) self.assertIn("Missing metadata, attributes are empty.", result.output) self.assertIn("no-empty-attrs", result.output) self.assertEqual(1, result.exit_code) def test_color_no_color(self): with text_file(DEFAULT_CONFIG_FILE_YAML, self.ok_config_yaml): - runner = CliRunner() - result = runner.invoke(main, ["--no-color"] + self.files) + result = self.xrlint("--no-color", *self.files) + self.assertIsNone(result.exception) self.assertEqual( "\n" "dataset1.zarr - ok\n\n" @@ -138,30 +143,18 @@ def test_color_no_color(self): self.assertEqual(0, result.exit_code) def test_files_with_rule_option(self): - runner = CliRunner() - result = runner.invoke( - main, - [ - "--rule", - "no-empty-attrs: error", - ] - + self.files, - ) + result = self.xrlint("--rule", "no-empty-attrs: error", *self.files) self.assertIn("Missing metadata, attributes are empty.", result.output) self.assertIn("no-empty-attrs", result.output) self.assertEqual(1, result.exit_code) def test_files_with_plugin_and_rule_options(self): - runner = CliRunner() - result = runner.invoke( - main, - [ - "--plugin", - "xrlint.plugins.xcube", - "--rule", - "xcube/any-spatial-data-var: error", - ] - + self.files, + result = self.xrlint( + "--plugin", + "xrlint.plugins.xcube", + "--rule", + "xcube/any-spatial-data-var: error", + *self.files, ) self.assertIn("No spatial data variables found.", result.output) self.assertIn("xcube/any-spatial-data-var", result.output) @@ -169,35 +162,49 @@ def test_files_with_plugin_and_rule_options(self): def test_files_with_output_file(self): with text_file(DEFAULT_CONFIG_FILE_YAML, self.ok_config_yaml): - runner = CliRunner() - result = runner.invoke(main, ["-o", "memory://report.txt"] + self.files) + result = self.xrlint("-o", "memory://report.txt", *self.files) self.assertEqual("", result.output) self.assertEqual(0, result.exit_code) def test_files_but_config_file_missing(self): - runner = CliRunner() - result = runner.invoke(main, ["-c", "pippo.py"] + self.files) + result = self.xrlint("-c", "pippo.py", *self.files) self.assertIn("Error: file not found: pippo.py", result.output) self.assertEqual(1, result.exit_code) def test_files_with_format_option(self): with text_file(DEFAULT_CONFIG_FILE_YAML, self.ok_config_yaml): - runner = CliRunner() - result = runner.invoke(main, ["-f", "json"] + self.files) + result = self.xrlint("-f", "json", *self.files) self.assertIn('"results": [\n', result.output) self.assertEqual(0, result.exit_code) def test_file_does_not_match(self): with text_file(DEFAULT_CONFIG_FILE_YAML, no_match_config_yaml): - runner = CliRunner() - result = runner.invoke(main, ["test.zarr"]) + result = self.xrlint("test.zarr") # TODO: make this assertion work # self.assertIn("No configuration matches this file.", result.output) self.assertEqual(1, result.exit_code) + def test_print_config_option(self): + with text_file(DEFAULT_CONFIG_FILE_YAML, self.ok_config_yaml): + result = self.xrlint("--print-config", "dataset2.zarr") + self.assertEqual( + ( + "{\n" + ' "name": "",\n' + ' "plugins": {\n' + ' "__core__": "xrlint.plugins.core"\n' + " },\n" + ' "rules": {\n' + ' "dataset-title-attr": 2\n' + " }\n" + "}\n" + ), + result.output, + ) + self.assertEqual(0, result.exit_code) + def test_files_with_invalid_format_option(self): - runner = CliRunner() - result = runner.invoke(main, ["-f", "foo"] + self.files) + result = self.xrlint("-f", "foo", *self.files) self.assertIn( "Error: unknown format 'foo'. The available formats are '", result.output ) @@ -208,8 +215,7 @@ def test_init(self): exists = os.path.exists(config_file) self.assertFalse(exists) try: - runner = CliRunner() - result = runner.invoke(main, ["--init"]) + result = self.xrlint("--init") self.assertEqual( f"Configuration template written to {config_file}\n", result.output ) @@ -225,8 +231,7 @@ def test_init_exists(self): exists = os.path.exists(config_file) self.assertFalse(exists) with text_file(config_file, ""): - runner = CliRunner() - result = runner.invoke(main, ["--init"]) + result = self.xrlint("--init") self.assertEqual( f"Error: file {config_file} already exists.\n", result.output ) diff --git a/tests/formatters/helpers.py b/tests/formatters/helpers.py index 1b6174d..2b47886 100644 --- a/tests/formatters/helpers.py +++ b/tests/formatters/helpers.py @@ -1,11 +1,31 @@ from xrlint.config import Config +from xrlint.formatter import FormatterContext from xrlint.plugin import Plugin from xrlint.plugin import PluginMeta -from xrlint.result import Message +from xrlint.result import Message, ResultStats from xrlint.result import Result from xrlint.rule import RuleOp +class FormatterContextImpl(FormatterContext): + + def __init__(self, max_warnings: int = -1): + self._max_warnings = max_warnings + self._result_stats = ResultStats() + + @property + def max_warnings_exceeded(self) -> bool: + return self._result_stats.warning_count > self._max_warnings + + @property + def result_stats(self) -> ResultStats: + return self._result_stats + + +def get_context(max_warnings: int = -1) -> FormatterContext: + return FormatterContextImpl(max_warnings) + + def get_test_results(): plugin = Plugin(meta=PluginMeta(name="test")) diff --git a/tests/formatters/test_html.py b/tests/formatters/test_html.py index 1659f51..3834edf 100644 --- a/tests/formatters/test_html.py +++ b/tests/formatters/test_html.py @@ -1,8 +1,7 @@ from unittest import TestCase -from xrlint.formatter import FormatterContext from xrlint.formatters.html import Html -from .helpers import get_test_results +from .helpers import get_test_results, get_context class HtmlTest(TestCase): @@ -10,7 +9,7 @@ def test_html(self): results = get_test_results() formatter = Html() text = formatter.format( - context=FormatterContext(), + context=get_context(), results=results, ) self.assertIsInstance(text, str) @@ -20,7 +19,7 @@ def test_html_with_meta(self): results = get_test_results() formatter = Html(with_meta=True) text = formatter.format( - context=FormatterContext(), + context=get_context(), results=results, ) self.assertIsInstance(text, str) diff --git a/tests/formatters/test_json.py b/tests/formatters/test_json.py index bdd4209..9477249 100644 --- a/tests/formatters/test_json.py +++ b/tests/formatters/test_json.py @@ -1,8 +1,7 @@ from unittest import TestCase -from xrlint.formatter import FormatterContext from xrlint.formatters.json import Json -from .helpers import get_test_results +from .helpers import get_test_results, get_context class JsonTest(TestCase): @@ -10,7 +9,7 @@ def test_json(self): results = get_test_results() formatter = Json() text = formatter.format( - context=FormatterContext(), + context=get_context(), results=results, ) self.assertIn('"results": [', text) @@ -19,7 +18,7 @@ def test_json_with_meta(self): results = get_test_results() formatter = Json(with_meta=True) text = formatter.format( - context=FormatterContext(), + context=get_context(), results=results, ) self.assertIn('"results": [', text) diff --git a/tests/formatters/test_markdown.py b/tests/formatters/test_markdown.py index 5efd8f8..62949a3 100644 --- a/tests/formatters/test_markdown.py +++ b/tests/formatters/test_markdown.py @@ -2,9 +2,8 @@ import pytest -from xrlint.formatter import FormatterContext from xrlint.formatters.markdown import Markdown -from .helpers import get_test_results +from .helpers import get_test_results, get_context class MarkdownTest(TestCase): @@ -13,6 +12,6 @@ def test_markdown(self): formatter = Markdown() with pytest.raises(NotImplementedError): formatter.format( - context=FormatterContext(), + context=get_context(), results=get_test_results(), ) diff --git a/tests/formatters/test_simple.py b/tests/formatters/test_simple.py index d67a7f8..f5206b7 100644 --- a/tests/formatters/test_simple.py +++ b/tests/formatters/test_simple.py @@ -1,7 +1,7 @@ from unittest import TestCase +from tests.formatters.helpers import get_context from xrlint.config import Config -from xrlint.formatter import FormatterContext from xrlint.formatters.simple import Simple from xrlint.result import Message from xrlint.result import Result @@ -23,7 +23,7 @@ class SimpleTest(TestCase): def test_no_color(self): formatter = Simple(styled=False) text = formatter.format( - context=FormatterContext(), + context=get_context(), results=self.results, ) self.assert_output_ok(text) @@ -32,7 +32,7 @@ def test_no_color(self): def test_color(self): formatter = Simple(styled=True) text = formatter.format( - context=FormatterContext(), + context=get_context(), results=self.results, ) self.assert_output_ok(text) diff --git a/tests/test_linter.py b/tests/test_linter.py index 65e7e8a..029ff84 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -37,13 +37,20 @@ def test_new_linter(self): self.assertEqual({CORE_PLUGIN_NAME}, set(linter.config.plugins.keys())) self.assertEqual(None, linter.config.rules) - linter = new_linter(recommended=False) + linter = new_linter(config_name=None) self.assertIsInstance(linter, xrl.Linter) self.assertIsInstance(linter.config.plugins, dict) self.assertEqual({CORE_PLUGIN_NAME}, set(linter.config.plugins.keys())) self.assertEqual(None, linter.config.rules) - linter = new_linter(recommended=True) + linter = new_linter("recommended") + self.assertIsInstance(linter, xrl.Linter) + self.assertIsInstance(linter.config.plugins, dict) + self.assertEqual({CORE_PLUGIN_NAME}, set(linter.config.plugins.keys())) + self.assertIsInstance(linter.config.rules, dict) + self.assertIn("coords-for-dims", linter.config.rules) + + linter = new_linter("all") self.assertIsInstance(linter, xrl.Linter) self.assertIsInstance(linter.config.plugins, dict) self.assertEqual({CORE_PLUGIN_NAME}, set(linter.config.plugins.keys())) diff --git a/tests/test_result.py b/tests/test_result.py index 84d491d..5192791 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -2,7 +2,13 @@ from xrlint.config import Config from xrlint.plugin import Plugin, PluginMeta -from xrlint.result import get_rules_meta_for_results, Result, Message, Suggestion +from xrlint.result import ( + get_rules_meta_for_results, + Result, + Message, + Suggestion, + ResultStats, +) from xrlint.rule import RuleOp, RuleMeta @@ -89,3 +95,40 @@ def test_from_value(self): suggestion = Suggestion("Use xr.transpose()") self.assertIs(suggestion, Suggestion.from_value(suggestion)) + + +class ResultStatsTest(TestCase): + def test_collect(self): + stats = ResultStats() + + self.assertEqual(0, stats.error_count) + self.assertEqual(0, stats.warning_count) + self.assertEqual(0, stats.result_count) + + results = [ + Result.new( + messages=[ + Message("R1 M1", severity=1), + Message("R1 M2", severity=2), + ] + ), + Result.new( + messages=[ + Message("R2 M1", severity=2), + ] + ), + Result.new( + messages=[ + Message("R3 M1", severity=1), + Message("R3 M2", severity=2), + Message("R3 M3", severity=2), + ] + ), + ] + + results2 = list(stats.collect(results)) + + self.assertEqual(results, results2) + self.assertEqual(4, stats.error_count) + self.assertEqual(2, stats.warning_count) + self.assertEqual(3, stats.result_count) diff --git a/xrlint/all.py b/xrlint/all.py index 2f29f03..653a1f0 100644 --- a/xrlint/all.py +++ b/xrlint/all.py @@ -1,4 +1,4 @@ -from xrlint.cli.engine import CliEngine +from xrlint.cli.engine import XRLint from xrlint.config import Config from xrlint.config import ConfigList from xrlint.formatter import Formatter @@ -34,7 +34,7 @@ from xrlint.version import version __all__ = [ - "CliEngine", + "XRLint", "Config", "ConfigList", "Linter", diff --git a/xrlint/cli/engine.py b/xrlint/cli/engine.py index d281d33..5a110bb 100644 --- a/xrlint/cli/engine.py +++ b/xrlint/cli/engine.py @@ -1,6 +1,6 @@ +from collections.abc import Iterable, Iterator +import json import os -from collections.abc import Iterable -from typing import Iterator import click import fsspec @@ -11,8 +11,10 @@ from xrlint.cli.constants import DEFAULT_CONFIG_FILE_YAML from xrlint.cli.constants import DEFAULT_GLOBAL_FILES from xrlint.cli.constants import DEFAULT_GLOBAL_IGNORES -from xrlint.cli.constants import INIT_CONFIG_YAML from xrlint.cli.constants import DEFAULT_OUTPUT_FORMAT +from xrlint.cli.constants import DEFAULT_MAX_WARNINGS +from xrlint.cli.constants import INIT_CONFIG_YAML +from xrlint.config import Config from xrlint.config import ConfigList from xrlint.config import get_core_config from xrlint.formatter import FormatterContext @@ -21,6 +23,7 @@ from xrlint.plugin import Plugin from xrlint.result import Message from xrlint.result import Result +from xrlint.result import ResultStats from xrlint.util.filefilter import FileFilter @@ -29,31 +32,52 @@ ) -class CliEngine: +class XRLint(FormatterContext): + """The engine behind the XRLint CLI application. + + The arguments are mostly 1:1 equivalents of the + CLI options. + """ # noinspection PyShadowingBuiltins def __init__( self, - no_default_config: int = False, + no_config_lookup: int = False, config_path: str | None = None, plugin_specs: tuple[str, ...] = (), rule_specs: tuple[str, ...] = (), output_format: str = DEFAULT_OUTPUT_FORMAT, output_path: str | None = None, - styled: bool = True, - files: tuple[str, ...] = (), + output_styled: bool = True, + max_warnings: int = DEFAULT_MAX_WARNINGS, ): - self.no_default_config = no_default_config + self.no_config_lookup = no_config_lookup self.config_path = config_path self.plugin_specs = plugin_specs self.rule_specs = rule_specs self.output_format = output_format self.output_path = output_path - self.styled = styled - self.files = files - - def load_config(self) -> ConfigList: - + self.output_styled = output_styled + self.max_warnings = max_warnings + self._result_stats = ResultStats() + self.config_list = ConfigList() + + @property + def max_warnings_exceeded(self) -> bool: + """`True` if the maximum number of warnings has been exceeded.""" + return self._result_stats.warning_count > self.max_warnings + + @property + def result_stats(self) -> ResultStats: + """Get current result statistics.""" + return self._result_stats + + def load_config_list(self) -> None: + """Load configuration list. + The function considers any `plugin` and `rule` + options, the default configuration file names or a specified + configuration file. + """ plugins = {} for plugin_spec in self.plugin_specs: plugin = Plugin.from_value(plugin_spec) @@ -71,7 +95,7 @@ def load_config(self) -> ConfigList: config_list = read_config_list(self.config_path) except (FileNotFoundError, ConfigError) as e: raise click.ClickException(f"{e}") from e - elif not self.no_default_config: + elif not self.no_config_lookup: for config_path in DEFAULT_CONFIG_FILES: try: config_list = read_config_list(config_path) @@ -92,13 +116,44 @@ def load_config(self) -> ConfigList: if rules: configs += [{"rules": rules}] - return ConfigList.from_value(configs) - - def verify_datasets(self, config_list: ConfigList) -> Iterator[Result]: - global_filter = config_list.get_global_filter(default=DEFAULT_GLOBAL_FILTER) + self.config_list = ConfigList.from_value(configs) + + def get_config_for_file(self, file_path: str) -> Config | None: + """Compute configuration for the given file. + + Args: + file_path: A file path. + Return: + A configuration object or `None` if no item + in the configuration list applies. + """ + return self.config_list.compute_config(file_path) + + def print_config_for_file(self, file_path: str) -> None: + """Print computed configuration for the given file. + + Args: + file_path: A file path. + """ + config = self.get_config_for_file(file_path) + config_json_obj = config.to_dict() if config is not None else None + click.echo(json.dumps(config_json_obj, indent=2)) + + def verify_datasets(self, files: Iterable[str]) -> Iterator[Result]: + """Verify given files. + The function produces a validation result for each file. + + Args: + files: Iterable of files. + Return: + Iterator of reports. + """ + global_filter = self.config_list.get_global_filter( + default=DEFAULT_GLOBAL_FILTER + ) linter = Linter() - for file_path, is_dir in get_files(self.files, global_filter): - config = config_list.compute_config(file_path) + for file_path, is_dir in get_files(files, global_filter): + config = self.get_config_for_file(file_path) if config is not None: yield linter.verify_dataset(file_path, config=config) else: @@ -114,6 +169,13 @@ def verify_datasets(self, config_list: ConfigList) -> Iterator[Result]: ) def format_results(self, results: Iterable[Result]) -> str: + """Format the given results. + + Args: + results: Iterable of results. + Return: + A report in plain text. + """ output_format = ( self.output_format if self.output_format else DEFAULT_OUTPUT_FORMAT ) @@ -129,16 +191,17 @@ def format_results(self, results: Iterable[Result]) -> str: # against formatter.meta.schema if output_format == "simple": formatter_kwargs = { - "styled": self.styled and self.output_path is None, + "styled": self.output_styled and self.output_path is None, "output": self.output_path is None, } else: formatter_kwargs = {} # noinspection PyArgumentList formatter_op = formatter.op_class(**formatter_kwargs) - return formatter_op.format(FormatterContext(False), results) + return formatter_op.format(self, self._result_stats.collect(results)) - def write_report(self, report: str): + def write_report(self, report: str) -> None: + """Write the validation report provided as plain text.""" if self.output_path: with fsspec.open(self.output_path, mode="w") as f: f.write(report) @@ -147,7 +210,10 @@ def write_report(self, report: str): print(report) @classmethod - def init_config_file(cls): + def init_config_file(cls) -> None: + """Write an initial configuration file. + The file is written into the current working directory. + """ file_path = DEFAULT_CONFIG_FILE_YAML if os.path.exists(file_path): raise click.ClickException(f"file {file_path} already exists.") @@ -157,8 +223,20 @@ def init_config_file(cls): def get_files( - file_paths: tuple[str, ...], global_filter: FileFilter + file_paths: Iterable[str], global_filter: FileFilter ) -> Iterator[tuple[str, bool | None]]: + """Provide an iterator for the list of files or directories. + Directories in `files` that are not filtered out will be + recursively traversed. + + Args: + file_paths: Iterable of files or directory. + global_filter: A file filter that includes files that + covered by global file patterns and not excluded + by global ignore patterns. + Return: + An iterator of filtered files or directories. + """ for file_path in file_paths: _fs, root = fsspec.url_to_fs(file_path) fs: fsspec.AbstractFileSystem = _fs @@ -167,7 +245,6 @@ def get_files( yield file_path, _dir elif _dir: for path, dirs, files in fs.walk(root): - # print(path, dirs, files) for d in dirs: d_path = f"{path}/{d}" if global_filter.accept(d_path): diff --git a/xrlint/cli/main.py b/xrlint/cli/main.py index e35ff15..fa76911 100644 --- a/xrlint/cli/main.py +++ b/xrlint/cli/main.py @@ -2,7 +2,6 @@ import click -from xrlint.cli.stats import Stats # Warning: do not import heavy stuff here, it can # slow down commands like "xrlint --help" otherwise. @@ -16,8 +15,8 @@ @click.command(name="xrlint") @click.option( - "--no-default-config", - "no_default_config", + "--no-config-lookup", + "no_config_lookup", help=f"Disable use of default configuration from {DEFAULT_CONFIG_BASENAME}.*", is_flag=True, ) @@ -29,7 +28,13 @@ f"Use this configuration, overriding {DEFAULT_CONFIG_BASENAME}.*" f" config options if present" ), - metavar="PATH", + metavar="FILE", +) +@click.option( + "--print-config", + "inspect_path", + help="Print the configuration for the given file", + metavar="FILE", ) @click.option( "--plugin", @@ -56,7 +61,7 @@ "--output-file", "output_file", help="Specify file to write report to", - metavar="PATH", + metavar="FILE", ) @click.option( "-f", @@ -93,8 +98,9 @@ @click.version_option(version) @click.help_option() def main( - no_default_config: bool, + no_config_lookup: bool, config_path: str | None, + inspect_path: str | None, plugin_specs: tuple[str, ...], rule_specs: tuple[str, ...], max_warnings: int, @@ -106,47 +112,53 @@ def main( ): """Validate the given dataset FILES. - Reads configuration from `xrlint.config.*` if file exists and - unless `--no-default-config` is set or `--config PATH` is provided. + Reads configuration from `./xrlint_config.*` if such file + exists and unless `--no_config_lookup` is set or `--config` is + provided. Then validates each dataset in FILES against the configuration. + The default dataset patters are `**/*.zarr` and `**/.nc`. + FILES may comprise also directories. If a directory is not matched + by any file pattern, it will be traversed recursively. The validation result is dumped to standard output if not otherwise - stated by `--output-file PATH`. The output format is `simple`. Other - inbuilt formats are `json` and `html` which can by setting the - `--format NAME` option. + stated by `--output-file`. The output format is `simple` by default. + Other inbuilt formats are `json` and `html` which you can specify + using the `--format` option. """ - from xrlint.cli.engine import CliEngine + from xrlint.cli.engine import XRLint if init_mode: - CliEngine.init_config_file() - raise click.exceptions.Exit(0) + XRLint.init_config_file() + return - if not files: - raise click.ClickException("No dataset files provided.") - - cli_engine = CliEngine( - no_default_config=no_default_config, + cli_engine = XRLint( + no_config_lookup=no_config_lookup, config_path=config_path, plugin_specs=plugin_specs, rule_specs=rule_specs, - files=files, output_format=output_format, output_path=output_file, - styled=color_enabled, + output_styled=color_enabled, + max_warnings=max_warnings, ) - stats = Stats() - config_list = cli_engine.load_config() - results = cli_engine.verify_datasets(config_list) - results = stats.collect(results) - report = cli_engine.format_results(results) - cli_engine.write_report(report) + if inspect_path: + cli_engine.load_config_list() + cli_engine.print_config_for_file(inspect_path) + return + + if files: + cli_engine.load_config_list() + results = cli_engine.verify_datasets(files) + report = cli_engine.format_results(results) + cli_engine.write_report(report) - error_status = stats.error_count > 0 - max_warn_status = stats.warning_count > max_warnings - if max_warn_status and not error_status: - click.echo("maximum number of warnings exceeded.") - if max_warn_status or error_status: - raise click.exceptions.Exit(1) + result_stats = cli_engine.result_stats + error_status = result_stats.error_count > 0 + max_warn_status = result_stats.warning_count > max_warnings + if max_warn_status and not error_status: + click.echo("Maximum number of warnings exceeded.") + if max_warn_status or error_status: + raise click.exceptions.Exit(1) if __name__ == "__main__": # pragma: no cover diff --git a/xrlint/cli/stats.py b/xrlint/cli/stats.py deleted file mode 100644 index 0472d8e..0000000 --- a/xrlint/cli/stats.py +++ /dev/null @@ -1,19 +0,0 @@ -from collections.abc import Iterable -from dataclasses import dataclass - -from xrlint.result import Result - - -@dataclass() -class Stats: - """Utility to collect simple statistics from results.""" - - error_count: int = 0 - warning_count: int = 0 - - def collect(self, results: Iterable[Result]) -> Iterable[Result]: - """Collect statistics from `results`.""" - for result in results: - self.error_count += result.error_count - self.warning_count += result.warning_count - yield result diff --git a/xrlint/config.py b/xrlint/config.py index f085241..b045ef8 100644 --- a/xrlint/config.py +++ b/xrlint/config.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from functools import cached_property -from typing import Any, TYPE_CHECKING, Union +from typing import Any, TYPE_CHECKING, Union, Literal from xrlint.constants import CORE_PLUGIN_NAME from xrlint.util.filefilter import FileFilter @@ -28,22 +28,30 @@ def get_core_plugin() -> "Plugin": return export_plugin() -def get_core_config(recommended: bool = False): +def get_core_config(config_name: Literal["all", "recommended"] | None = None): """Create a base configuration for the built-in plugins. Args: - recommended: `True` (the default) if the recommended - rule configurations of the built-in plugins should be used. + config_name: `"recommended"` if the recommended configuration + of the builtin rules should be used, or `"all"` if all rules + shall be used. Pass `None` (the default) if you don't want this. + In the latter case, you should configure the `rules` + option either in `config` or `config_kwargs`. Otherwise, calling + `verify_dataset()` without any rule configuration will never + succeed for any given dataset. Returns: A new `Config` object """ core_plugin = get_core_plugin() - return Config( + config = Config( plugins={ CORE_PLUGIN_NAME: core_plugin, }, - rules=core_plugin.configs["recommended"].rules if recommended else None, ) + if config_name: + return config.merge(core_plugin.configs[config_name]) + else: + return config def split_config_spec(config_spec: str) -> tuple[str, str]: @@ -369,6 +377,23 @@ def _parse_options(cls, name: str, config_dict: dict) -> dict[str, Any]: if settings is not None: raise TypeError(format_message_type_of(name, settings, "dict[str,Any]")) + def to_dict(self): + d = super().to_dict() + plugins: dict[str, Plugin] | None = d.get("plugins") + if plugins is not None: + d["plugins"] = {k: v.meta.module or "?" for k, v in plugins.items()} + rules: dict[str, RuleConfig] | None = d.get("rules") + if rules is not None: + d["rules"] = { + k: ( + v.severity + if not (v.args or v.kwargs) + else [v.severity, v.args, v.kwargs] + ) + for k, v in rules.items() + } + return d + @dataclass(frozen=True) class ConfigList: diff --git a/xrlint/formatter.py b/xrlint/formatter.py index 4052ceb..aed79df 100644 --- a/xrlint/formatter.py +++ b/xrlint/formatter.py @@ -4,15 +4,22 @@ from typing import Any, Callable, Type from xrlint.result import Result +from xrlint.result import ResultStats from xrlint.util.naming import to_kebab_case -class FormatterContext: +class FormatterContext(ABC): """A formatter context is passed to `FormatOp`.""" - def __init__(self, max_warnings_exceeded: bool = False): - self.max_warnings_exceeded = max_warnings_exceeded - """`True` if the maximum number of results has been exceeded.""" + @property + @abstractmethod + def max_warnings_exceeded(self) -> bool: + """`True` if the maximum number of warnings has been exceeded.""" + + @property + @abstractmethod + def result_stats(self) -> ResultStats: + """Get current result statistics.""" class FormatterOp(ABC): diff --git a/xrlint/linter.py b/xrlint/linter.py index 943910e..11b3847 100644 --- a/xrlint/linter.py +++ b/xrlint/linter.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Literal from xrlint.config import Config from xrlint.config import get_core_config @@ -8,16 +8,18 @@ def new_linter( - recommended: bool = False, + config_name: Literal["all", "recommended"] | None = None, + *, config: Config | dict | None = None, **config_kwargs: dict[str, Any], ) -> "Linter": - """Create a new `Linter` with core rules loaded. + """Create a new `Linter` with the given configuration. Args: - recommended: `True` if the recommended configurations of the builtin - rules should be used. - If set to `False` (the default), you should configure the `rules` + config_name: `"recommended"` if the recommended configuration + of the builtin rules should be used, or `"all"` if all rules + shall be used. Pass `None` (the default) if you don't want this. + In the latter case, you should configure the `rules` option either in `config` or `config_kwargs`. Otherwise, calling `verify_dataset()` without any rule configuration will never succeed for any given dataset. @@ -28,7 +30,7 @@ def new_linter( A new linter instance """ return Linter( - config=merge_configs(get_core_config(recommended=recommended), config), + config=merge_configs(get_core_config(config_name=config_name), config), **config_kwargs, ) diff --git a/xrlint/plugin.py b/xrlint/plugin.py index 0be9a94..7f8e6aa 100644 --- a/xrlint/plugin.py +++ b/xrlint/plugin.py @@ -18,6 +18,9 @@ class PluginMeta: version: str = "0.0.0" """Plugin version.""" + module: str | None = None + """Plugin module.""" + @dataclass(frozen=True, kw_only=True) class Plugin: diff --git a/xrlint/plugins/core/rules/__init__.py b/xrlint/plugins/core/rules/__init__.py index 96164a5..def96e7 100644 --- a/xrlint/plugins/core/rules/__init__.py +++ b/xrlint/plugins/core/rules/__init__.py @@ -1,6 +1,13 @@ from xrlint.constants import CORE_PLUGIN_NAME from xrlint.plugin import Plugin from xrlint.plugin import PluginMeta +from xrlint.version import version -plugin = Plugin(meta=PluginMeta(name=CORE_PLUGIN_NAME, version="0.0.1")) +plugin = Plugin( + meta=PluginMeta( + name=CORE_PLUGIN_NAME, + version=version, + module=__package__.rsplit(".", maxsplit=1)[0], + ) +) diff --git a/xrlint/plugins/xcube/rules/__init__.py b/xrlint/plugins/xcube/rules/__init__.py index 086c31f..e7531f2 100644 --- a/xrlint/plugins/xcube/rules/__init__.py +++ b/xrlint/plugins/xcube/rules/__init__.py @@ -1,5 +1,12 @@ from xrlint.plugin import Plugin from xrlint.plugin import PluginMeta +from xrlint.version import version -plugin = Plugin(meta=PluginMeta(name="xcube", version="0.0.1")) +plugin = Plugin( + meta=PluginMeta( + name="xcube", + version=version, + module=__package__.rsplit(".", maxsplit=1)[0], + ) +) diff --git a/xrlint/result.py b/xrlint/result.py index 2c13e34..b6e6311 100644 --- a/xrlint/result.py +++ b/xrlint/result.py @@ -1,3 +1,4 @@ +from collections.abc import Iterable from dataclasses import dataclass, field from typing import Literal, TYPE_CHECKING, Any, Union import html @@ -196,3 +197,20 @@ def get_rules_meta_for_results(results: list[Result]) -> dict[str, "RuleMeta"]: rule = result.config.get_rule(message.rule_id) rules_meta[message.rule_id] = rule.meta return rules_meta + + +@dataclass() +class ResultStats: + """Utility for collecting simple statistics from results.""" + + error_count: int = 0 + warning_count: int = 0 + result_count: int = 0 + + def collect(self, results: Iterable[Result]) -> Iterable[Result]: + """Collect statistics from `results`.""" + for result in results: + self.error_count += result.error_count + self.warning_count += result.warning_count + self.result_count += 1 + yield result diff --git a/xrlint/testing.py b/xrlint/testing.py index e1bf5e0..d172693 100644 --- a/xrlint/testing.py +++ b/xrlint/testing.py @@ -46,10 +46,10 @@ class RuleTester: """Utility that helps to test rules. Args: - config: optional xrlint configuration + config: optional XRLint configuration. """ - def __init__(self, **config): + def __init__(self, **config: dict[str, Any]): self._config = config def run(