diff --git a/CHANGES.md b/CHANGES.md index ee54e97..51aeb22 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -23,6 +23,8 @@ dictionary, or a name that refers to a named configuration of a plugin. - Other changes: + - Property `config` of `Linter` now returns a `ConfigList` instead + of a `Config` object. - Directories that are recognized by file patterns associated with a non-empty configuration object are no longer recursively traversed. - Node path names now contain the dataset index if a file path diff --git a/notebooks/mkdataset.py b/notebooks/mkdataset.py index c15f0cf..28d4c24 100644 --- a/notebooks/mkdataset.py +++ b/notebooks/mkdataset.py @@ -36,7 +36,7 @@ def make_dataset() -> xr.Dataset: attrs={ "standard_name": "time", "long_name": "time", - "units": "days since 2020-01-01 utc", + "units": "days since 2020-01-01 +0:00", "calendar": "gregorian", }, ), @@ -71,7 +71,7 @@ def make_dataset_with_issues() -> xr.Dataset: invalid_ds.x.attrs["axis"] = "x" del invalid_ds.y.attrs["standard_name"] invalid_ds.y.attrs["axis"] = "y" - invalid_ds.time.attrs["units"] = "days since 2020-01-01 ß0:000:00" + invalid_ds.time.attrs["units"] = "days since 2020-01-01 UTC" invalid_ds.attrs = {} invalid_ds.sst.attrs["units"] = 1 invalid_ds["sst_avg"] = xr.DataArray( diff --git a/notebooks/xrlint-linter.ipynb b/notebooks/xrlint-linter.ipynb index 0c1e987..cc8a02d 100644 --- a/notebooks/xrlint-linter.ipynb +++ b/notebooks/xrlint-linter.ipynb @@ -39,7 +39,7 @@ { "data": { "text/plain": [ - "'0.3.0.dev0'" + "'0.4.0.dev0'" ] }, "execution_count": 2, @@ -51,6 +51,13 @@ "xrl.version" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a valid xarray dataset for demonstration:" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -437,38 +444,38 @@ " * time (time) int64 32B 0 365 730 1095\n", " spatial_ref int64 8B 0\n", "Data variables:\n", - " sst (time, y, x) float64 192B 0.357 0.4776 0.8956 ... 0.2892 0.1911\n", - " sst_anomaly (time, y, x) float64 192B 0.1693 0.4643 ... 0.6467 0.9646\n", + " sst (time, y, x) float64 192B 0.8258 0.8193 ... 0.7022 0.5899\n", + " sst_anomaly (time, y, x) float64 192B 0.4368 0.9209 ... 0.1712 0.1026\n", "Attributes:\n", - " title: SST-Climatology Subset" + " title: SST-Climatology Subset" ], "text/plain": [ " Size: 464B\n", @@ -479,8 +486,8 @@ " * time (time) int64 32B 0 365 730 1095\n", " spatial_ref int64 8B 0\n", "Data variables:\n", - " sst (time, y, x) float64 192B 0.357 0.4776 0.8956 ... 0.2892 0.1911\n", - " sst_anomaly (time, y, x) float64 192B 0.1693 0.4643 ... 0.6467 0.9646\n", + " sst (time, y, x) float64 192B 0.8258 0.8193 ... 0.7022 0.5899\n", + " sst_anomaly (time, y, x) float64 192B 0.4368 0.9209 ... 0.1712 0.1026\n", "Attributes:\n", " title: SST-Climatology Subset" ] @@ -516,7 +523,7 @@ "

<dataset> - ok

\n" ], "text/plain": [ - "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.3.0.dev0', ref='xrlint.plugins.core:export_plugin'), rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', schema=None, ref=None, docs_url=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', schema=None, ref=None, docs_url=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', schema=None, ref=None, docs_url=None, type='problem'), op_class=), 'lat-coordinate': Rule(meta=RuleMeta(name='lat-coordinate', version='1.0.0', description='Latitude coordinate should have standard units and standard names.', schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate', type='problem'), op_class=), 'lon-coordinate': Rule(meta=RuleMeta(name='lon-coordinate', version='1.0.0', description='Longitude coordinate should have standard units and standard names.', schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#longitude-coordinate', type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', schema=None, ref=None, docs_url=None, type='suggestion'), op_class=), 'time-coordinate': Rule(meta=RuleMeta(name='time-coordinate', version='1.0.0', description=\"Time coordinate (standard_name='time') should have unambiguous time units encoding.\", schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate', type='problem'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", schema=None, ref=None, docs_url=None, type='suggestion'), op_class=)}, processors={}, configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)})}, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[], fixable_error_count=0, fixable_warning_count=0, error_count=0, fatal_error_count=0, warning_count=0)" + "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.4.0.dev0', ref='xrlint.plugins.core:export_plugin'), rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', schema=None, ref=None, docs_url=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', schema=None, ref=None, docs_url=None, type='suggestion'), op_class=), 'flags': Rule(meta=RuleMeta(name='flags', version='1.0.0', description=\"Validate attributes 'flag_values', 'flag_masks' and 'flag_meanings' that make variables that contain flag values self describing. \", schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#flags', type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', schema=None, ref=None, docs_url=None, type='problem'), op_class=), 'lat-coordinate': Rule(meta=RuleMeta(name='lat-coordinate', version='1.0.0', description='Latitude coordinate should have standard units and standard names.', schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate', type='problem'), op_class=), 'lon-coordinate': Rule(meta=RuleMeta(name='lon-coordinate', version='1.0.0', description='Longitude coordinate should have standard units and standard names.', schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#longitude-coordinate', type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', schema=None, ref=None, docs_url=None, type='suggestion'), op_class=), 'no-empty-chunks': Rule(meta=RuleMeta(name='no-empty-chunks', version='1.0.0', description='Empty chunks should not be encoded and written. The rule currently applies to Zarr format only.', schema=None, ref=None, docs_url='https://docs.xarray.dev/en/stable/generated/xarray.Dataset.to_zarr.html#xarray-dataset-to-zarr', type='suggestion'), op_class=), 'time-coordinate': Rule(meta=RuleMeta(name='time-coordinate', version='1.0.0', description='Time coordinates should have valid and unambiguous time units encoding.', schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate', type='problem'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", schema=None, ref=None, docs_url=None, type='suggestion'), op_class=)}, processors={}, configs={'recommended': [Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'flags': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'no-empty-chunks': RuleConfig(severity=1, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None)], 'all': [Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'flags': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-chunks': RuleConfig(severity=2, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)]})}, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'flags': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'no-empty-chunks': RuleConfig(severity=1, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[], fixable_error_count=0, fixable_warning_count=0, error_count=0, fatal_error_count=0, warning_count=0)" ] }, "execution_count": 5, @@ -528,6 +535,13 @@ "linter.verify_dataset(ds)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a xarray dataset with some issues for demonstration:" + ] + }, { "cell_type": "code", "execution_count": 6, @@ -914,38 +928,38 @@ " * time (time) int64 32B 0 365 730 1095\n", " spatial_ref int64 8B 0\n", "Data variables:\n", - " sst (time, y, x) float64 192B 0.9673 0.8684 ... 0.6397 0.2261\n", - " sst_anomaly (time, y, x) float64 192B 0.5013 0.1469 0.03853 ... 0.51 0.9694\n", - " sst_avg (x, y) float64 48B 0.5494 0.02223 0.5723 0.4118 0.1869 0.872" + " sst (time, y, x) float64 192B 0.6958 0.9708 ... 0.2524 0.9463\n", + " sst_anomaly (time, y, x) float64 192B 0.5294 0.5702 ... 0.3938 0.9155\n", + " sst_avg (x, y) float64 48B 0.812 0.2143 0.8362 0.9635 0.1922 0.518" ], "text/plain": [ " Size: 512B\n", @@ -956,9 +970,9 @@ " * time (time) int64 32B 0 365 730 1095\n", " spatial_ref int64 8B 0\n", "Data variables:\n", - " sst (time, y, x) float64 192B 0.9673 0.8684 ... 0.6397 0.2261\n", - " sst_anomaly (time, y, x) float64 192B 0.5013 0.1469 0.03853 ... 0.51 0.9694\n", - " sst_avg (x, y) float64 48B 0.5494 0.02223 0.5723 0.4118 0.1869 0.872" + " sst (time, y, x) float64 192B 0.6958 0.9708 ... 0.2524 0.9463\n", + " sst_anomaly (time, y, x) float64 192B 0.5294 0.5702 ... 0.3938 0.9155\n", + " sst_avg (x, y) float64 48B 0.812 0.2143 0.8362 0.9635 0.1922 0.518" ] }, "execution_count": 6, @@ -984,19 +998,19 @@ "

<dataset>:

\n", "\n", "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", "\n", "
dataset warn Missing 'title' attribute in dataset. dataset-title-attr
dataset.coords['y'] errorAttribute 'standard_name' should be 'latitude', was None. lat-coordinate
dataset.coords['y'] errorAttribute 'axis' should be 'Y', was 'y'. lat-coordinate
dataset.coords['x'] errorAttribute 'units' should be 'degrees_east', was 'degrees'. lon-coordinate
dataset.coords['x'] errorAttribute 'axis' should be 'X', was 'x'. lon-coordinate
dataset.attrs warn Missing metadata, attributes are empty. no-empty-attrs
dataset.coords['time'] errorMissing timezone in 'units' attribute: days since 2020-01-01 ß0:000:00time-coordinate
dataset.data_vars['sst']warn Invalid 'units' attribute in variable 'sst'. var-units-attr
dataset warn Missing 'title' attribute in dataset. dataset-title-attr
dataset.coords['y'] errorAttribute 'standard_name' should be 'latitude', was None. lat-coordinate
dataset.coords['y'] errorAttribute 'axis' should be 'Y', was 'y'. lat-coordinate
dataset.coords['x'] errorAttribute 'units' should be 'degrees_east', was 'degrees'.lon-coordinate
dataset.coords['x'] errorAttribute 'axis' should be 'X', was 'x'. lon-coordinate
dataset.attrs warn Missing metadata, attributes are empty. no-empty-attrs
dataset.coords['time'] errorInvalid 'units' attribute: 'days since 2020-01-01 UTC'. time-coordinate
dataset.data_vars['sst']warn Invalid 'units' attribute in variable 'sst'. var-units-attr

8 problems (5 errors and 3 warnings)

\n" ], "text/plain": [ - "Result(config=Config(name=None, files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins={'__core__': Plugin(meta=PluginMeta(name='__core__', version='0.3.0.dev0', ref='xrlint.plugins.core:export_plugin'), rules={'coords-for-dims': Rule(meta=RuleMeta(name='coords-for-dims', version='1.0.0', description='Dimensions of data variables should have corresponding coordinates.', schema=None, ref=None, docs_url=None, type='problem'), op_class=), 'dataset-title-attr': Rule(meta=RuleMeta(name='dataset-title-attr', version='1.0.0', description='Datasets should be given a non-empty title.', schema=None, ref=None, docs_url=None, type='suggestion'), op_class=), 'grid-mappings': Rule(meta=RuleMeta(name='grid-mappings', version='1.0.0', description='Grid mappings, if any, shall have valid grid mapping coordinate variables.', schema=None, ref=None, docs_url=None, type='problem'), op_class=), 'lat-coordinate': Rule(meta=RuleMeta(name='lat-coordinate', version='1.0.0', description='Latitude coordinate should have standard units and standard names.', schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate', type='problem'), op_class=), 'lon-coordinate': Rule(meta=RuleMeta(name='lon-coordinate', version='1.0.0', description='Longitude coordinate should have standard units and standard names.', schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#longitude-coordinate', type='problem'), op_class=), 'no-empty-attrs': Rule(meta=RuleMeta(name='no-empty-attrs', version='1.0.0', description='Every dataset element should have metadata that describes it.', schema=None, ref=None, docs_url=None, type='suggestion'), op_class=), 'time-coordinate': Rule(meta=RuleMeta(name='time-coordinate', version='1.0.0', description=\"Time coordinate (standard_name='time') should have unambiguous time units encoding.\", schema=None, ref=None, docs_url='https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate', type='problem'), op_class=), 'var-units-attr': Rule(meta=RuleMeta(name='var-units-attr', version='1.0.0', description=\"Every variable should have a valid 'units' attribute.\", schema=None, ref=None, docs_url=None, type='suggestion'), op_class=)}, processors={}, configs={'recommended': Config(name='recommended', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), 'all': Config(name='all', files=None, ignores=None, linter_options=None, opener_options=None, processor=None, plugins=None, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=2, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=2, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=2, args=(), kwargs={})}, settings=None)})}, rules={'coords-for-dims': RuleConfig(severity=2, args=(), kwargs={}), 'dataset-title-attr': RuleConfig(severity=1, args=(), kwargs={}), 'grid-mappings': RuleConfig(severity=2, args=(), kwargs={}), 'lat-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'lon-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'no-empty-attrs': RuleConfig(severity=1, args=(), kwargs={}), 'time-coordinate': RuleConfig(severity=2, args=(), kwargs={}), 'var-units-attr': RuleConfig(severity=1, args=(), kwargs={})}, settings=None), file_path='', messages=[Message(message=\"Missing 'title' attribute in dataset.\", node_path='dataset', rule_id='dataset-title-attr', severity=1, fatal=None, fix=None, suggestions=None), Message(message=\"Attribute 'standard_name' should be 'latitude', was None.\", node_path=\"dataset.coords['y']\", rule_id='lat-coordinate', severity=2, fatal=None, fix=None, suggestions=None), Message(message=\"Attribute 'axis' should be 'Y', was 'y'.\", node_path=\"dataset.coords['y']\", rule_id='lat-coordinate', severity=2, fatal=None, fix=None, suggestions=None), Message(message=\"Attribute 'units' should be 'degrees_east', was 'degrees'.\", node_path=\"dataset.coords['x']\", rule_id='lon-coordinate', severity=2, fatal=None, fix=None, suggestions=None), Message(message=\"Attribute 'axis' should be 'X', was 'x'.\", node_path=\"dataset.coords['x']\", rule_id='lon-coordinate', severity=2, fatal=None, fix=None, suggestions=None), Message(message='Missing metadata, attributes are empty.', node_path='dataset.attrs', rule_id='no-empty-attrs', severity=1, fatal=None, fix=None, suggestions=[Suggestion(desc='Make sure to add appropriate metadata attributes to dataset elements.', data=None, fix=None)]), Message(message=\"Missing timezone in 'units' attribute: days since 2020-01-01 ß0:000:00\", node_path=\"dataset.coords['time']\", rule_id='time-coordinate', severity=2, fatal=None, fix=None, suggestions=[Suggestion(desc=\"Specify 'units' attribute using format ' since