Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5819509
initial version of a convention converter
keewis Sep 16, 2025
f3a51ab
implement the `easygems` convention
keewis Sep 16, 2025
d7636d3
implement a generalized cf convention
keewis Sep 17, 2025
433e759
implement a converter to the `xdggs` convention
keewis Sep 17, 2025
7234042
don't require `convention` as a public kwarg
keewis Sep 17, 2025
55b0566
refactor the conventions module
keewis Oct 8, 2025
e9474b9
refactor the decoding machinery
keewis Oct 8, 2025
8e33eb5
typo
keewis Oct 8, 2025
27e3a19
use the registry objects in the accessor
keewis Oct 8, 2025
4635a02
remove the `grid_name` attr from the metadata
keewis Oct 8, 2025
cbe7698
implement the cf convention decoder
keewis Oct 8, 2025
012da31
default to `name=None` and override it in the xdggs convention
keewis Oct 8, 2025
ac4ebe5
don't try to guess the convention
keewis Oct 8, 2025
45c7927
extend the docstring of `decode`
keewis Oct 8, 2025
5dc6272
tests for the xdggs convention decoder
keewis Oct 9, 2025
145dbed
add `h3-py` to the dev deps
keewis Oct 9, 2025
156b1e5
override the variable metadata
keewis Oct 9, 2025
c9b190b
use the correct metadata for the convention
keewis Oct 9, 2025
6e87ff7
Merge branch 'main' into metadata-conventions
keewis Oct 9, 2025
7317f6e
add jupyterlab-myst
keewis Oct 9, 2025
d40bc56
pass index options along
keewis Oct 9, 2025
c1fbd16
bump lock file
keewis Oct 9, 2025
1906e86
pass index options along
keewis Oct 9, 2025
f29e55d
only try to convert dataset objects
keewis Oct 9, 2025
4018668
move the cell id creation into a separate function
keewis Oct 9, 2025
60dc4e8
refactor the index creation function
keewis Oct 9, 2025
0acb855
add tests for the cf convention decoder
keewis Oct 9, 2025
ebef960
refactor the index creation
keewis Oct 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7,956 changes: 4,172 additions & 3,784 deletions pixi.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,10 @@ jupyterlab = "*"
jupyter-resource-usage = "*"
jupyterlab_code_formatter = "*"
python-build = ">=1.3.0,<2"
h3-py = ">=4.3.0,<5"
geopandas = ">=1.1.1,<2"
pyinstrument = ">=5.1.1,<6"
jupyterlab-myst = ">=2.4.2,<3"

[tool.pixi.environments]
nightly = { features = ["tests", "nightly"], no-default-feature = true }
Expand Down
69 changes: 59 additions & 10 deletions xdggs/accessor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy.typing as npt
import xarray as xr

from xdggs import conventions
from xdggs.grid import DGGSInfo
from xdggs.index import DGGSIndex
from xdggs.plotting import explore
Expand Down Expand Up @@ -30,7 +31,13 @@ def __init__(self, obj: xr.Dataset | xr.DataArray):
self._index = index

def decode(
self, grid_info=None, *, name="cell_ids", index_options=None, **index_kwargs
self,
grid_info=None,
*,
name="cell_ids",
convention="xdggs",
index_options=None,
**index_kwargs,
) -> xr.Dataset | xr.DataArray:
"""decode the DGGS cell ids

Expand All @@ -39,8 +46,22 @@ def decode(
grid_info : dict or DGGSInfo, optional
Override the grid parameters on the dataset. Useful to set attributes on
the dataset.
name : str, default: "cell_ids"
The name of the coordinate containing the cell ids.
name : str, optional
The name of the coordinate containing the cell ids. The default name
depends on the convention.
convention : str, default: "xdggs"
The name of the metadata convention. Built-in conventions are:

- "xdggs": the existing xdggs convention. ``name`` points to the
coordinate containing cell ids, and which has all the grid
metadata. The ``name`` parameter defaults to ``"cell_ids"``.
- "cf": the upcoming CF convention standardization. While the
convention extension is specialized on ``healpix`` for now, the
decoder can work with other DGGS as well. For this, all metadata
lives on a variable with a ``grid_mapping_name`` attribute, and
the cell ids coordinate is indicated by the ``coordinates``
attribute on data variables / other coordinates (this can be
overridden by the ``name`` parameter).
index_options, **index_kwargs : dict, optional
Additional options to forward to the index.

Expand All @@ -49,18 +70,24 @@ def decode(
obj : xarray.DataArray or xarray.Dataset
The object with a DGGS index on the cell id coordinate.
"""
var = self._obj[name]
if isinstance(grid_info, DGGSInfo):
grid_info = grid_info.to_dict()
if isinstance(grid_info, dict):
var.attrs = grid_info
if callable(convention):
decoder = convention
else:
decoder = conventions._decoders.get(convention)
if decoder is None:
valid_names = conventions._decoders.keys()
raise ValueError(
f"unknown convention: {convention}."
f" Choose a known convention: {', '.join(valid_names)}"
)

if index_options is None:
index_options = {}

return self._obj.drop_indexes(name, errors="ignore").set_xindex(
name, DGGSIndex, **(index_options | index_kwargs)
coords = decoder(
self._obj, grid_info=grid_info, name=name, index_options=index_options
)
return self._obj.assign_coords(coords)

@property
def index(self) -> DGGSIndex:
Expand Down Expand Up @@ -243,3 +270,25 @@ def explore(self, *, cmap="viridis", center=None, alpha=None, coords=None):
alpha=alpha,
coords=coords,
)

def as_convention(self, convention: str):
"""Convert the dataset to a specific convention

Parameters
----------
convention : str
The name of the convention. Supported are:
- "easygems": ``grid_mapping`` coordinate and ``cell`` dimension and ``cell`` coordinate with a `pandas` index.
- "cf": ``grid_mapping`` coordinate with ``cell_index`` coordinate and ``cell`` dimension.
- "xdggs": ``cell_ids`` coordinate with grid metadata and a ``cells`` coordinate.

Returns
-------
obj : xr.DataArray or xr.Dataset
The object converted to the given dimension.
"""
converter = conventions._encoders.get(convention)
if converter is None:
raise ValueError(f"unknown convention: {convention}")

return converter(self._obj)
24 changes: 24 additions & 0 deletions xdggs/conventions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from xdggs.conventions import decoders, encoders # noqa: F401
from xdggs.conventions.registry import decoders as _decoders
from xdggs.conventions.registry import encoders as _encoders # noqa: F401
from xdggs.conventions.registry import (
register_decoder,
register_encoder,
)


class DecoderError(Exception):
pass


def detect_decoder(obj, grid_info, name):
for decoder_name, decoder in _decoders.items():
try:
return decoder(obj, grid_info=grid_info, name=name)
except DecoderError:
pass

raise ValueError("cannot detect a matching convention")


__all__ = ["register_decoder", "register_encoder"]
78 changes: 78 additions & 0 deletions xdggs/conventions/decoders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import xarray as xr

from xdggs.conventions.registry import register_decoder
from xdggs.grid import DGGSInfo
from xdggs.utils import GRID_REGISTRY, call_on_dataset


@register_decoder("xdggs")
def xdggs(obj, grid_info, name, index_options):
if name is None:
name = "cell_ids"

try:
var = obj[name]
except IndexError:
raise ValueError("Cannot find the cell ids coordinate")

if len(var.dims) != 1:
# TODO: allow 0D
raise ValueError("cell id coordinate must be 1D")
[dim] = var.dims

if grid_info is None:
grid_info = var.attrs
elif isinstance(grid_info, DGGSInfo):
# TODO: avoid serializing / deserializing cycle
grid_info = grid_info.to_dict()

grid_name = grid_info["grid_name"]
if grid_name not in GRID_REGISTRY:
raise ValueError(f"unknown grid name: {grid_name}")
index_cls = GRID_REGISTRY[grid_name]

var_ = var.copy(deep=True)
var_.attrs = grid_info
index = index_cls.from_variables({name: var_}, options=index_options)

return xr.Coordinates({name: var.variable}, indexes={name: index})


@register_decoder("cf")
def cf(obj, grid_info, name, index_options):
vars_ = call_on_dataset(
lambda ds: ds.variables,
obj,
)
grid_mapping_vars = {
name: var for name, var in vars_.items() if "grid_mapping_name" in var.attrs
}
if len(grid_mapping_vars) != 1:
raise ValueError("needs exactly one grid mapping variable for now")
crs = next(iter(grid_mapping_vars.values()))

if name is None:
coords = list(
dict.fromkeys(
var.attrs["coordinates"]
for name, var in vars_.items()
if "coordinates" in var.attrs
)
)
name = coords[0]

translations = {"refinement_level": "level"}
grid_info = {
translations.get(name, name): value for name, value in crs.attrs.items()
}
grid_name = grid_info.pop("grid_mapping_name")
var = vars_[name].copy(deep=False)
var.attrs = grid_info

if grid_name not in GRID_REGISTRY:
raise ValueError(f"unknown grid name: {grid_name}")
index_cls = GRID_REGISTRY[grid_name]

index = index_cls.from_variables({name: var}, options=index_options)

return xr.Coordinates({name: var}, indexes={name: index})
88 changes: 88 additions & 0 deletions xdggs/conventions/encoders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import numpy as np
import xarray as xr

from xdggs.conventions.registry import register_encoder
from xdggs.utils import GRID_REGISTRY, call_on_dataset


def infer_grid_name(index):
for name, cls in GRID_REGISTRY.items():
if cls is type(index):
return name

raise ValueError("unknown index")


@register_encoder("xdggs")
def xdggs(obj):
def _convert(ds):
coord = ds.dggs._name

grid_name = infer_grid_name(ds.dggs.index)
metadata = {"grid_name": grid_name} | ds.dggs.grid_info.to_dict()

return ds.assign_coords({coord: lambda ds: ds[coord].assign_attrs(metadata)})

return call_on_dataset(_convert, obj)


@register_encoder("easygems")
def easygems(obj):
orders = {"nested": "nest", "ring": "ring"}

def _convert(ds):
grid_info = ds.dggs.grid_info
dim = ds.dggs.index._dim
coord = ds.dggs._name

order = orders.get(grid_info.indexing_scheme)
if order is None:
raise ValueError(f"easygems: unsupported indexing scheme: {order}")

metadata = {
"grid_mapping_name": "healpix",
"healpix_nside": grid_info.nside,
"healpix_order": order,
}
crs = xr.Variable((), np.int8(0), metadata)

return (
ds.assign_coords(crs=crs)
.drop_indexes(coord)
.rename_dims({dim: "cell"})
.rename_vars({coord: "cell"})
.set_xindex("cell")
)

return call_on_dataset(_convert, obj)


@register_encoder("cf")
def cf(obj):
def _convert(ds):
grid_info = ds.dggs.grid_info
dim = ds.dggs.index._dim
coord = ds.dggs._name

grid_name = infer_grid_name(ds.dggs.index)
metadata = grid_info.to_dict() | {"grid_mapping_name": grid_name}
metadata["refinement_level"] = metadata.pop("level")
metadata.pop("grid_name", None)

crs = xr.Variable((), np.int8(0), metadata)

additional_var_attrs = {"coordinates": coord, "grid_mapping": "crs"}
coord_attrs = {"standard_name": "healpix_index", "units": "1"}

new = ds.copy(deep=False)
for key, var in new.variables.items():
if key == coord or dim not in var.dims:
continue

var.attrs |= additional_var_attrs

new[coord].attrs |= coord_attrs

return new.assign_coords({"crs": crs})

return call_on_dataset(_convert, obj)
36 changes: 36 additions & 0 deletions xdggs/conventions/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import warnings

decoders = {}
encoders = {}


class DecoderWarning(UserWarning):
pass


def register_decoder(name):
def register(func):
if name in decoders:
warnings.warn(
DecoderWarning(f"Overwriting existing convention decoder {name!r}.")
)

decoders[name] = func

return func

return register


def register_encoder(name):
def register(func):
if name in encoders:
warnings.warn(
DecoderWarning(f"Overwriting existing convention encoder {name!r}.")
)

encoders[name] = func

return func

return register
Loading
Loading