diff --git a/standard/template/schemas/multiscales.schema.json b/standard/template/schemas/multiscales.schema.json new file mode 100644 index 0000000..0b16ea0 --- /dev/null +++ b/standard/template/schemas/multiscales.schema.json @@ -0,0 +1,100 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.org/schemas/multiscales.schema.json", + "title": "Multiscales Schema", + "description": "Defines the structure of the 'multiscales' attribute for describing multiscale hierarchies in an OverviewSet.", + "type": "object", + "required": ["version", "layout"], + "properties": { + "version": { + "type": "string", + "description": "Version identifier of the multiscales schema (e.g., '1.0')." + }, + "resampling_method": { + "type": "string", + "description": "Default resampling or aggregation method applied across all overview levels.", + "enum": [ + "nearest", + "average", + "bilinear", + "cubic", + "cubic_spline", + "lanczos", + "mode", + "max", + "min", + "med", + "sum", + "q1", + "q3", + "rms", + "gauss" + ], + "default": "nearest" + }, + "tile_matrix_ref": { + "description": "Reference to an external grid or tiling definition (e.g., OGC Tile Matrix Set identifier or URI).", + "type": ["string", "object"] + }, + "layout": { + "type": "array", + "description": "Ordered list of Overview Level objects defining the hierarchy from highest to lowest resolution.", + "minItems": 1, + "items": { + "$ref": "#/$defs/overviewLevel" + } + } + }, + "$defs": { + "overviewLevel": { + "title": "Overview Level Object", + "type": "object", + "required": ["id"], + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for this overview level (e.g., 'L0', 'L1', 'L2')." + }, + "path": { + "type": "string", + "description": "Logical path identifying the overview level's location within the dataset hierarchy. If omitted, the level is assumed to be a direct child of the OverviewSet, and 'id' is used as the relative path." + }, + "derived_from": { + "type": "string", + "description": "Identifier of another overview level from which this level was derived." + }, + "factors": { + "type": "array", + "description": "Numeric decimation factors per dimension (e.g., [2, 2] for 2× downsampling in X and Y).", + "items": { + "type": "number" + }, + "minItems": 1 + }, + "resampling_method": { + "type": "string", + "description": "Resampling or aggregation method specific to this level. If not provided, the global 'resampling_method' applies.", + "enum": [ + "nearest", + "average", + "bilinear", + "cubic", + "cubic_spline", + "lanczos", + "mode", + "max", + "min", + "med", + "sum", + "q1", + "q3", + "rms", + "gauss" + ] + } + }, + "additionalProperties": true + } + }, + "additionalProperties": true +} diff --git a/standard/template/sections/clause_7_part_overviews.adoc b/standard/template/sections/clause_7_part_overviews.adoc new file mode 100644 index 0000000..7e2b6fb --- /dev/null +++ b/standard/template/sections/clause_7_part_overviews.adoc @@ -0,0 +1,235 @@ +=== Overviews + +==== Introduction + +*Overviews* are downscaled representations of gridded data designed to optimise visualisation and scalable access to large datasets. +Overviews or *multiscale pyramid* provide lower-resolution versions of the same variables, enabling rapid display, efficient zooming, and progressive data exploration. +Multiple overview levels may exist, each representing the same data at a coarser spatial resolution. + +The *Overviews* construct extends the Common Data Model (CDM) by defining a hierarchical organisation of groups and variables that represent data at multiple scales. The *OverviewSet* is self-described by attributes defined at the parent group level, which declare the relationships between its levels and ensure consistent, interoperable multiscale representation within the CDM framework. + +==== Purpose and Scope + +The *Overviews* extension enables scalable access to multidimensional gridded data, particularly for geospatial and remote sensing applications. It supports: + +- Progressive rendering and visualisation at multiple resolutions +- Efficient data transfer for large datasets +- Multi-resolution analysis in analytical or cloud environments +- Consistent representation of raster and data cube structures across scales + +This specification is format-agnostic and may be implemented in any CDM-compliant structure, regardless of physical encoding (e.g. Zarr, NetCDF, GeoTIFF) although the present specification specifically targets Zarr. + +==== Conceptual Model + +The Overviews construct defines a multiscale hierarchy applied to the <>, i.e., the CDM group containing the data variables and their associated metadata, and optionally other related variables that share identical dimensions and coordinate systems. + +Example: Typical CDM Group Structure without overviews + +``` +Group: variable_group/ +├── variable1 +├── variable2 +├── aux_variable +├── coordinates1 +├── coordinates2 +└── Attributes +``` + +Each overview level provides a reduced-resolution representation of the same variables. This approach avoids redundancy by describing the hierarchy once for the entire group rather than for individual variables, ensuring consistency and concise metadata. + +All overview levels are semantically equivalent, differing only in resolution, array extent, or sampling density. +There is no requirement for a single base or reference level—each level may serve as an entry point depending on the application. + + +==== Model Components + +The *Overviews* construct defines the conceptual elements used to represent multiscale data within the GeoZarr data model. It extends the existing CDM concept to support datasets provided at multiple spatial resolutions. + +The construct introduces the following conceptual elements: + +[cols="1,3"] +|=== +|Element |Definition + +|`OverviewSet` |A *group* composed of multiple *OverviewLevels*, each containing equivalent variables defined over the same coordinate system and dimensions but sampled at different spatial resolutions. The *OverviewSet* defines the complete multiscale hierarchy. + +|`OverviewLevel` |A single resolution level within an *OverviewSet*. Each level replicates the structure and semantics of the others, differing only in resolution or extent. + +|`zoom_level` |An optional ordered identifier used to distinguish overview levels (e.g. `0`, `1`, `2` or symbolic identifiers). The ordering indicates relative resolution but does not imply dependency. +|=== + +The OverviewSet retains the same structure as a nominal variable group, including the associated metadata and auxiliary variables, so that the multiscale hierarchy preserves the complete descriptive context of the original dataset + +==== +*Note:* The native-resolution data **MAY** be stored directly in the *OverviewSet* group rather than in a dedicated *OverviewLevel* subgroup. + +This layout is permitted for backward compatibility with existing datasets that were later augmented with multiscale metadata. +However, it is **not recommended**, as it may lead to inconsistent hierarchies or interpretation issues in client applications expecting all resolution levels to be represented as explicit subgroups. +==== + +==== Structural Layout + +The *Overviews* construct is expressed within the Common Data Model (CDM) framework, which represents datasets through *groups*, *variables*, and *attributes*. + +An *OverviewSet* corresponds to a CDM *group* containing multiple *OverviewLevels*, each representing the same data variables at different spatial resolutions. + +Within this structure: + +- **Groups** define the hierarchical organisation of the multiscale data. +The *OverviewSet* acts as the parent group, while each *OverviewLevel* is represented as a child group that contains variables with identical names, dimensions, and coordinate definitions. The *OverviewSet* group may also include auxiliary variables and metadata consistent with the structure of a nominal CDM group. + +- **Variables** represent the same physical or derived quantities across resolutions. +Each level contains the same set of data variables and coordinate variables (for example, `x` and `y`) that describe grid geometry at that resolution. + +- **Attributes** describe both the dataset metadata and the relationships between overview levels. +They may appear at the *OverviewSet* or *OverviewLevel* level and are used to define the structure and interpretation of the hierarchy. + + +The complete description of the hierarchy is provided by the `multiscales` property, an attribute of the *OverviewSet* group that lists the available overview levels, their identifiers, and any associated information such as resampling methods or grid references. + +===== OverviewSet CDM-Based Representation + +The following example illustrates the structural organisation of an *OverviewSet* using Common Data Model (CDM) constructs: + +``` +Group: reflectance/ # OverviewSet (Group) +├── Attribute: multiscales # Metadata describing the multiscale hierarchy +├── Attribute: spatial_ref = "EPSG:32633" +├── Auxiliary Variable: quality_flag +├── Group: L0/ # OverviewLevel (highest or nominal resolution) +│ ├── Variable: b01 +│ ├── Variable: b02 +│ ├── Variable: b03 +│ ├── Coordinate Variable: x +│ └── Coordinate Variable: y +├── Group: L1/ # OverviewLevel (coarser resolution) +│ ├── Variable: b01 +│ ├── Variable: b02 +│ ├── Variable: b03 +│ ├── Coordinate Variable: x +│ └── Coordinate Variable: y +└── Group: L2/ # OverviewLevel (coarsest resolution) + ├── Variable: b01 + ├── Variable: b02 + ├── Variable: b03 + ├── Coordinate Variable: x + └── Coordinate Variable: y +``` + +In this representation: + +- The **parent group** (`reflectant/`) corresponds to the *OverviewSet* and defines the common spatial, semantic, and organisational context for all levels. +- Each **child group** (`L0`, `L1`, `L2`) represents an *OverviewLevel*, implemented as a CDM *group* containing variables that share the same names, coordinate variables, and metadata conventions. +- **Variables** (`b01`, `b02`, etc.) represent equivalent physical quantities at different spatial resolutions. + +==== OverviewSet Metadata + +The `multiscales` property is an attribute of the *OverviewSet* group that formally defines the organisation of the multiscale hierarchy. +It provides a structured description of all overview levels, their ordering, and the resampling or aggregation relationships between them. + +The property SHALL be encoded as a structured object formally defined as a JSON Schema available at: +link:../schemas/multiscales.schema.json[Multiscales JSON Schema] + +It defines global attributes applying to the entire hierarchy and a `layout` array that lists all overview levels in order of resolution. + +===== Multiscales Fields + +[cols="1,3"] +|=== +|Field |Definition + +|`version` |**Type:** string. +Version identifier of the multiscales schema. +This field SHALL be present to indicate the version of the schema used. +Example: `"1.0"` + +|`resampling_method` |**Type:** string. +(Optional) Default resampling or aggregation method applied across all levels. +If omitted, resampling may be defined per level. +Allowed values include: `"nearest"`, `"average"`, `"bilinear"`, `"cubic"`, `"cubic_spline"`, `"lanczos"`, `"mode"`, `"max"`, `"min"`, `"med"`, `"sum"`, `"q1"`, `"q3"`, `"rms"`, `"gauss"`. +Default: `"nearest"`. + +|`tile_matrix_ref` |**Type:** string or object. +(Optional) Reference to an external grid or tiling definition (e.g. an OGC Tile Matrix Set identifier or URI) that describes the spatial structure and scale relationships. + +|`layout` |**Type:** array of <>. +A mandatory array describing each *OverviewLevel* within the hierarchy, ordered from highest to lowest resolution. +Each entry defines the group name and optional derivation information. +|=== + +[[overview-level-object]] +===== Overview Level Object + +Each object in the `layout` array describes one *OverviewLevel* within the multiscale hierarchy. +It defines a unique identifier for the level, its location within the dataset hierarchy, and optionally its derivation from another level. + +[cols="1,3"] +|=== +|Field |Definition + +|`id` |**Type:** string. +Required unique identifier for this overview level. +The identifier SHALL be stable within the dataset and MAY be used for reference in other metadata fields. +Example: `"L0"`, `"L1"`, `"L2"`. + +|`path` |**Type:** string. +(Optional) Logical path identifying the location of the overview level within the dataset hierarchy. +If omitted, the level is assumed to be located as a *direct child group* of the *OverviewSet* and the `id` value SHALL be used as the default relative path. +Example: `"L0"`, `"overviews/L2"`. + +|`derived_from` |**Type:** string. +(Optional) Identifier of another overview level from which this level was derived. +Used to express lineage or dependency relationships between levels. +The value SHALL correspond to an existing `id` entry in the same `layout` array. + +|`factors` |**Type:** array of number. +(Optional) Numeric decimation factors per dimension (e.g. `[2, 2]` for a 2× reduction in X and Y). +Used to describe the scaling applied to generate this level from its source. + +|`resampling_method` |**Type:** string. +(Optional) Resampling or aggregation method specific to this level. +If not defined, the method specified in the root `multiscales.resampling_method` field applies. +|=== + +// Group and from_group directly reference the data model structure itself. Path provide a clearer and more neutral way to describe these fields that keeps them referential without binding them to data model specific structures + +===== Example Representation + +Here is a JSON example that conforms to the **final `multiscales` schema**: + +```json +{ + "version": "1.0", + "resampling_method": "average", + "tile_matrix_ref": "OGC:WMT:1.0:WebMercatorQuad", + "layout": [ + { + "id": "L0", + "path": "L0" + }, + { + "id": "L1", + "path": "L1", + "derived_from": "L0", + "factors": [2, 2], + "resampling_method": "average" + }, + { + "id": "L2", + "path": "L2", + "derived_from": "L1", + "factors": [2, 2], + "resampling_method": "average" + } + ] +} +``` + +**Notes:** + +* Each `id` uniquely identifies an overview level. +* `path` points to the logical container for that level (may be omitted if it is a direct child of the `OverviewSet`). +* `derived_from` expresses lineage between levels. +* `factors` defines downscaling ratios. +* `resampling_method` can be defined per level or inherited from the global one. +* The `tile_matrix_ref` provide context for external referencing. diff --git a/standard/template/sections/clause_7_unified_data_model.adoc b/standard/template/sections/clause_7_unified_data_model.adoc index 8af7598..531158f 100644 --- a/standard/template/sections/clause_7_unified_data_model.adoc +++ b/standard/template/sections/clause_7_unified_data_model.adoc @@ -202,74 +202,9 @@ Metadata may be declared at various levels within the model structure: All metadata follows harmonised naming and semantics consistent with the CDM and CF standards, enabling machine and human interpretability while supporting metadata exchange across diverse systems. -==== Overviews -The *Overviews* construct defines a formal, interoperable abstraction for multiscale gridded data. It ensures structural consistency across zoom levels and provides a semantic model for integration with tiled representations such as GeoTIFF overviews, OGC API – Tiles, and STAC Tiled Assets. +include::clause_7_part_overviews.adoc[] -===== Purpose - -The *Overviews* construct provides a general mechanism for associating a single logical data variable with a collection of resampled representations, referred to as *zoom levels*. Each zoom level holds a reduced-resolution version of the original variable, with progressively decreasing spatial resolution from the base (highest detail) to the coarsest level. - -Overviews enable: - -- Fast access to summary representations for visualisation -- Progressive transmission and downsampling -- Multi-resolution analytics and adaptive processing - -===== Conceptual Structure - -An *Overviews* construct is defined as a *hierarchical set of multiscale representations* of one or more data variables. It comprises the following components: - -[horizontal] -*Base Variable*:: The original, highest-resolution variable to which the overview hierarchy is anchored. It is defined using the standard `DataVariable` structure in the model. -*Overview Levels*:: A sequence of variables representing the same logical quantity as the base variable, but sampled at coarser spatial resolutions. -*Zoom Level Identifier*:: A unique identifier associated with each level, ordered from finest (e.g. `"0"`) to coarsest resolution (e.g. `"N"`). -*Tile Grid Definition*:: A mapping that associates each zoom level with a spatial tiling layout, defined in alignment with a `TileMatrixSet`. -*Spatial Alignment*:: Each overview variable MUST be spatially aligned with the base variable using a consistent coordinate reference system and compatible axis orientation. -*Resampling Method*:: A declared method indicating the technique used to derive coarser levels from the base variable (e.g. `nearest`, `average`, `cubic`). - -===== Model Components - -The *Overviews* construct is represented in the unified data model using the following logical elements: - -[cols="1,3"] -|=== -|Element |Definition - -|`OverviewSet` | A logical grouping of variables at multiple zoom levels associated with a single base variable. - -|`OverviewLevel` | A single resampled variable at a specific resolution, identified by a zoom level string. - -|`TileMatrixSetRef` | A reference to the tile grid specification applied across all overview levels. May refer to a well-known identifier, a URI, or an inline object. - -|`TileMatrixLimits` | (Optional) Constraints on the tile coverage per zoom level. - -|`resampling_method` | A string indicating the uniform method used to downsample data across all levels. -|=== - -All overview levels MUST preserve: - -- The data variable’s semantic identity (`standard_name`, `units`, etc.) -- The coordinate reference system -- The axis order and dimension semantics - -Only the resolution and extent (through tiling and shape) may differ across levels. - -===== Relationship to Tile Matrix Set - -The *Overviews* construct is structurally aligned with the OGC Tile Matrix Set concept. Each zoom level is mapped to a `TileMatrix`, and the chunk layout for the corresponding data variable SHALL match the tile grid’s `tileWidth` and `tileHeight`. - -The `OverviewSet` MAY constrain tile matrix limits using `TileMatrixSetLimits`, which restrict tile indices to actual data coverage, consistent with the spatial extent of the overview variable. - -===== Usage Context - -The *Overviews* construct is applicable to any gridded data variable with at least two spatial dimensions. It is primarily designed for: - -- Raster imagery (e.g. reflectance, temperature) -- Data cubes with spatial slices (e.g. time-series of spatial grids) -- Multi-band products with consistent spatial structure across levels - -The structure may be extended for N-dimensional datasets in future revisions, provided that two spatial axes can be unambiguously identified. === Conformance and Extensibility diff --git a/standard/template/sections/clause_9_zarr_encoding_overviews.adoc b/standard/template/sections/clause_9_zarr_encoding_overviews.adoc index b20092e..3e74154 100644 --- a/standard/template/sections/clause_9_zarr_encoding_overviews.adoc +++ b/standard/template/sections/clause_9_zarr_encoding_overviews.adoc @@ -1,47 +1,25 @@ === Encoding of Multiscale Overviews in Zarr -This clause specifies how multiscale tiling (also known as overviews or pyramids) is encoded in Zarr-based datasets conforming to the unified data model. The encoding supports both Zarr Version 2 and Version 3 and is aligned with the OGC Two Dimensional Tile Matrix Set Standard. +This clause specifies how Overviews should be encoded in Zarr-based datasets conforming to the GeoZarr data model. -Multiscale datasets are composed of a set of Zarr groups representing multiple zoom levels. Each level stores coarser-resolution resampled versions of the original data variables. +The *Overviews* construct follows the same encoding principles defined for the Common Data Model (CDM) when represented in Zarr. +Because overviews are defined purely at the CDM level through groups, variables, and attributes, no Zarr-specific structural extensions are introduced. -==== Hierarchical Layout +Each *OverviewSet* is encoded as a Zarr *group*, containing multiple *OverviewLevel* subgroups. +The parent group includes the `multiscales` attribute, which declares the hierarchy and relationships between levels. +Each *OverviewLevel* is implemented as a standard Zarr group containing variables (arrays) and coordinate variables, encoded identically to other CDM variables. -Each zoom level SHALL be represented as a Zarr group, identified by the Tile Matrix identifier (e.g., `"0"`, `"1"`, `"2"`). These groups SHALL be organised hierarchically under a common multiscale root group. Each zoom-level group SHALL contain the complete set of variables (Zarr arrays) corresponding to that resolution. +==== Relationship to Core CDM Encoding -[cols="1,2,2"] -|=== -|Structure |Zarr v2 |Zarr v3 +The encoding of overviews reuses the same mapping rules established in the core data model encoding: -|Zoom level groups | Subdirectories with `.zgroup` and `.zattrs` | Subdirectories with `zarr.json`, `node_type: group` +- **Groups** map to Zarr directories with `.zgroup`/`.zattrs` (Zarr v2) or `zarr.json` (Zarr v3, with `"node_type": "group"`). +- **Variables** map to Zarr arrays with `.zarray` and `.zattrs` (Zarr v2) or `"node_type": "array"` entries in `zarr.json` (Zarr v3). +- **Attributes** (including `multiscales`) are stored in `.zattrs` (Zarr v2) or under the `"attributes"` field in `zarr.json` (Zarr v3). -|Variables at each level | Zarr arrays (`.zarray`, `.zattrs`) in each group | Zarr arrays (`zarr.json`, `node_type: array`) in each group +==== Example Encoding (Zarr v3) -|Global metadata | `multiscales` defined in parent `.zattrs` | `multiscales` defined in parent group `zarr.json` under `attributes` -|=== - -Each multiscale group MUST define chunking (tiling) along the spatial dimensions (`X`, `Y`, or `lon`, `lat`). Recommended chunk sizes are 256×256 or 512×512. - -==== Metadata Encoding - -Multiscale metadata SHALL be defined using a `multiscales` attribute located in the parent group of the zoom levels. This attribute SHALL be a JSON object with the following members: - -- `tile_matrix_set` – Identifier, URI, or inline JSON object compliant with OGC TileMatrixSet v2 -- `resampling_method` – One of the standard string values (e.g., `"nearest"`, `"average"`) -- `tile_matrix_set_limits` – (optional) Zoom-level limits following the STAC Tiled Asset style - -===== Zarr v2 Encoding Example (`.zattrs`) -[source,json] ----- -{ - "multiscales": { - "tile_matrix_set": "WebMercatorQuad", - "resampling_method": "nearest" - } -} ----- - -===== Zarr v3 Encoding Example (`zarr.json`) [source,json] ---- { @@ -49,53 +27,46 @@ Multiscale metadata SHALL be defined using a `multiscales` attribute located in "node_type": "group", "attributes": { "multiscales": { - "tile_matrix_set": "WebMercatorQuad", - "resampling_method": "nearest" - } - } -} ----- - -==== Tile Matrix Set Representation - -The `tile_matrix_set` member MAY take one of the following forms: - -- A string referring to a well-known identifier (e.g., `"WebMercatorQuad"`) -- A URI pointing to a JSON document describing the tile matrix set -- An inline JSON object (CamelCase, OGC TMS 2.0 compatible) - -Zoom level identifiers in the tile matrix set MUST match the names of the child groups. The spatial reference system declared in `supportedCRS` MUST match the one declared in the corresponding `grid_mapping` of the data variables. - -==== Chunk Layout Alignment - -At each zoom level, chunking SHALL match the tile layout defined by the TileMatrix: - -- Chunks MUST be aligned with the tile grid (1:1 mapping between chunks and tiles) -- Chunk sizes MUST match the `tileWidth` and `tileHeight` declared in the TileMatrix -- Spatial dimensions MUST be clearly identified using `dimension_names` (v3) or `_ARRAY_DIMENSIONS` (v2) - -==== Tile Matrix Set Limits - -The `tile_matrix_set_limits` object MAY define the extent of actual data coverage for each zoom level. This follows the style of the STAC tiled-assets extension rather than the full OGC JSON encoding. - -Example: -[source,json] ----- -"tile_matrix_set_limits": { - "1": { - "min_tile_col": 0, - "max_tile_col": 1, - "min_tile_row": 0, - "max_tile_row": 1 + "version": "1.0", + "resampling_method": "average", + "layout": [ + {"id": "L0", "path": "L0"}, + {"id": "L1", "path": "L1", "derived_from": "L0", "factors": [2, 2]}, + {"id": "L2", "path": "L2", "derived_from": "L1", "factors": [2, 2]} + ] + }, + "spatial_ref": "EPSG:32633" + }, + "metadata": { + "title": "Reflectance Multiscale Example" } } ---- -==== Resampling Method - -The `resampling_method` MUST indicate the method used for downsampling across zoom levels. The value MUST be one of: - -`nearest`, `average`, `bilinear`, `cubic`, `cubic_spline`, `lanczos`, `mode`, `max`, `min`, `med`, `sum`, `q1`, `q3`, `rms`, `gauss` - -The same method MUST apply across all levels. +Child groups represent the overview levels: + +``` + +reflectance/ +├── zarr.json # OverviewSet metadata (includes "multiscales") +├── L0/ +│ ├── zarr.json # Highest or nominal resolution +│ ├── b01/ +│ ├── b02/ +│ ├── x/ +│ └── y/ +├── L1/ +│ ├── zarr.json +│ ├── b01/ +│ ├── b02/ +│ ├── x/ +│ └── y/ +└── L2/ +├── zarr.json +├── b01/ +├── b02/ +├── x/ +└── y/ + +```