From c7b186c94f95a1772d60e0c6aa0a33f6ff1329e8 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 30 Aug 2024 16:35:05 +0200 Subject: [PATCH 01/15] Redefined collection-level metadata #13 --- core/README.md | 32 +++------- core/schema/schema.yaml | 10 +++ geojson/README.md | 22 +++---- .../examples/featurecollection/features.json | 22 +++---- .../examples/individual-features/12324.json | 13 ++-- .../examples/individual-features/2713.json | 13 ++-- .../individual-features/collection.json | 62 ------------------- geoparquet/README.md | 8 +-- 8 files changed, 52 insertions(+), 130 deletions(-) delete mode 100644 geojson/examples/individual-features/collection.json diff --git a/core/README.md b/core/README.md index bb6259b..4d16c5f 100644 --- a/core/README.md +++ b/core/README.md @@ -23,31 +23,15 @@ This allows to define a clear mapping between the core specification and its enc - [Data types](https://github.com/fiboa/schema/blob/v0.1.0/datatypes.md) - [Vocabulary](https://github.com/fiboa/schema/blob/v0.1.0/README.md#vocabulary) -## Collection - -Collection-level metadata must be provided in an object that contains the properties below. -The invidiual encodings may decide to embed the collection or make it available separately. - -### Properties +## General Properties | Property Name | Data Type | Description | | ---------------- | -------------- | ----------- | -| fiboa_version | string | **REQUIRED.** Version number of the fiboa specification this entity implements. | -| fiboa_extensions | array\ | A list of URIs to extensions this entity implements. | - -Generally, the version and the extensions must be uniform per Collection. - -Other properties are also allowed to be provided, but are not described by this specification. - -## Features - -### General Properties - -| Property Name | Data Type | Description | -| ------------- | -------------- | ----------- | -| id | string | **REQUIRED.** A unique identifier for the field. It must be unique within the [Collection](#collection). | -| collection | string | The identifier of the parent collection. | -| category | array\ | A set of categories the field boundary belongs to. | +| fiboa_version | string | **REQUIRED.** Version number of the fiboa specification the collection implements. | +| fiboa_extensions | array\ | A list of URIs to extensions the collection implements. | +| id | string | **REQUIRED.** A unique identifier for the field. It must be unique per collection, i.e. `collection` and `id` form a unique identifier. | +| collection | string | The identifier of the parent collection. | +| category | array\ | A set of categories the field boundary belongs to. | **collection:** The collection identifier is usually only needed for merged datasets. @@ -65,7 +49,7 @@ Other properties are also allowed to be provided, but are not described by this The categories are based on the [definitions of the AgGateway initiative](https://aggateway.org/Portals/1010/WebSite/About%20Us/FIELD%20BOUNDARY%20FLYER%20122123.pdf?ver=2024-01-03-212959-590). -### Spatial Properties +## Spatial Properties | Property Name | Data Type | Description | | ------------- | ------------ | ----------- | @@ -79,7 +63,7 @@ and must match the geometry's area/perimeter. If they do not match then the geometry should be considered canonical. Validators may flag the value as invalid if it exceeds a certain threshold. -### Determination Properties +## Determination Properties | Property Name | Data Type | Description | | ---------------------- | --------- | ------------------------------------------------------------ | diff --git a/core/schema/schema.yaml b/core/schema/schema.yaml index 6dbd5b1..e652951 100644 --- a/core/schema/schema.yaml +++ b/core/schema/schema.yaml @@ -1,8 +1,18 @@ $schema: https://fiboa.github.io/schema/v0.1.0/schema.json required: + - fiboa_version - id - geometry properties: + fiboa_version: + type: string + enum: + - 0.2.0 + fiboa_extensions: + type: array + items: + type: string + format: uri id: type: string minLength: 1 diff --git a/geojson/README.md b/geojson/README.md index a24ff36..bcff673 100644 --- a/geojson/README.md +++ b/geojson/README.md @@ -12,16 +12,14 @@ The generic GeoJSON format is defined in 2. [as individual Features with a dedicated Collection](examples/individual-features/) - **[Datatype mapping](datatypes.md)** -## Collection +## FeatureCollection -A [fiboa Collection](../core/README.md#collection) must be provided as a JSON object, either -1. embedded into the GeoJSON in a top-level property named `fiboa` (see example 1), or -2. separately as a JSON file that is linked to from the GeoJSON (see example 2). - -A fiboa Collection may be a [GeoJSON FeatureCollection](https://datatracker.ietf.org/doc/html/rfc7946#section-3.3). +A FeatureCollection may have a top-level property named `fiboa`. +If present, it contains all properties that are common across the features. +In validation they must be copied to the `properties` in each Feature. All features in a FeatureCollection must be fiboa-compliant. -## Features +## Feature Each [fiboa Feature](../core/README.md#features) must be a valid [GeoJSON Feature](https://datatracker.ietf.org/doc/html/rfc7946#section-3.2). @@ -30,13 +28,12 @@ The following properties are defined for a GeoJSON Feature (at the top-level of | Property Name | Data Type | Description | | ------------- | ------------------- | ------------------------------------------------------------ | -| id | string | **REQUIRED. ** See [id](../core/README.md#general-properties) in the core specification, must not be a `number` | -| type | string | **REQUIRED. ** The GeoJSON type, must be: `Feature` | +| id | string | **REQUIRED.** See [id](../core/README.md#general-properties) in the core specification, must not be a `number` | +| type | string | **REQUIRED.** The GeoJSON type, must be: `Feature` | | geometry | object | **REQUIRED.** A [GeoJSON Geometry Object](https://datatracker.ietf.org/doc/html/rfc7946#section-3.1), must not be `null` | | bbox | array\ | A [GeoJSON Bounding Box](https://datatracker.ietf.org/doc/html/rfc7946#section-5) | -| properties | object | An object with additional properties (see [`properties`](#properties)) | +| properties | object | An object with all additional properties (see [`properties`](#properties)) | | links | array\ | A list of links (see [`links`](#links)) | -| fiboa | object | An object with the [fiboa Collection](../core/README.md#collection) properties if not provided as a link (see [Collection](#collection)). | > [!IMPORTANT] > RFC 7946 doesn't support a property named `crs`, which was only available in an earlier version of GeoJSON (2008). @@ -45,7 +42,7 @@ The following properties are defined for a GeoJSON Feature (at the top-level of ### `properties` -Must include any property that is required by the fiboa core specification (currently none). +Must include any property that is required by the fiboa core specification (currently `fiboa_version`). May include any additional property. All properties defined by the core specification (except for `id`, `geometry` and `bbox`) or extensions should be provided here. @@ -59,4 +56,3 @@ defined in The following relation types are commonly used: - `self`: Absolute link to the GeoJSON file itself. -- `collection`: Link to the [Collection](#collection) diff --git a/geojson/examples/featurecollection/features.json b/geojson/examples/featurecollection/features.json index ab48994..c7041de 100644 --- a/geojson/examples/featurecollection/features.json +++ b/geojson/examples/featurecollection/features.json @@ -1,4 +1,14 @@ { + "fiboa": { + "fiboa_version": "0.2.0", + "fiboa_extensions": [ + "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml", + "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" + ], + "collection": "de_nrw", + "license": "dl-de/by-2-0", + "attribution": "Land Nordrhein-Westfalen / Open.NRW - https://www.opengeodata.nrw.de/produkte/umwelt_klima/bodennutzung/landwirtschaft/" + }, "type": "FeatureCollection", "features": [ { @@ -151,15 +161,5 @@ 51.92614034991495 ] } - ], - "fiboa": { - "fiboa_version": "0.2.0", - "fiboa_extensions": [ - "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml" - ], - "id": "de_nrw", - "title": "Field boundaries for North Rhine-Westphalia (NRW), Germany", - "license": "dl-de/by-2-0", - "attribution": "Land Nordrhein-Westfalen / Open.NRW - https://www.opengeodata.nrw.de/produkte/umwelt_klima/bodennutzung/landwirtschaft/" - } + ] } \ No newline at end of file diff --git a/geojson/examples/individual-features/12324.json b/geojson/examples/individual-features/12324.json index 6402e59..5752024 100644 --- a/geojson/examples/individual-features/12324.json +++ b/geojson/examples/individual-features/12324.json @@ -2,6 +2,10 @@ "id": "12324", "type": "Feature", "properties": { + "fiboa_version": "0.2.0", + "fiboa_extensions": [ + "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" + ], "flik": "DENWLI0542130247", "determination_datetime": "2005-02-28T00:00:00Z", "nutz_code": "A", @@ -58,12 +62,5 @@ ] ] ] - }, - "links": [ - { - "href": "collection.json", - "rel": "collection", - "type": "application/json" - } - ] + } } \ No newline at end of file diff --git a/geojson/examples/individual-features/2713.json b/geojson/examples/individual-features/2713.json index 7c917ce..57e1559 100644 --- a/geojson/examples/individual-features/2713.json +++ b/geojson/examples/individual-features/2713.json @@ -2,6 +2,10 @@ "id": "2713", "type": "Feature", "properties": { + "fiboa_version": "0.2.0", + "fiboa_extensions": [ + "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" + ], "flik": "DENWLI0540210084", "determination_datetime": "2005-02-28T00:00:00Z", "nutz_code": "A", @@ -70,12 +74,5 @@ ] ] ] - }, - "links": [ - { - "href": "collection.json", - "rel": "collection", - "type": "application/json" - } - ] + } } \ No newline at end of file diff --git a/geojson/examples/individual-features/collection.json b/geojson/examples/individual-features/collection.json deleted file mode 100644 index 210bda1..0000000 --- a/geojson/examples/individual-features/collection.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "fiboa_version": "0.2.0", - "fiboa_extensions": [], - "stac_version": "1.0.0", - "type": "Collection", - "id": "de_nrw", - "title": "Field boundaries for North Rhine-Westphalia (NRW), Germany", - "description": "A field block (German: \"Feldblock\") is a contiguous agricultural area surrounded by permanent boundaries, which is cultivated by one or more farmers with one or more crops, is fully or partially set aside or is fully or partially taken out of production. Field blocks are classified separately according to the main land uses of arable land, grassland, permanent crops, 2nd pillar and other. Since 2005, field blocks in NRW have represented the area reference within the framework of the Integrated Administration and Control System (IACS) for EU agricultural subsidies.", - "license": "proprietary", - "providers": [ - { - "name": "Land Nordrhein-Westfalen / Open.NRW", - "roles": [ - "producer", - "licensor" - ], - "url": "https://www.opengeodata.nrw.de/produkte/umwelt_klima/bodennutzung/landwirtschaft/" - }, - { - "name": "fiboa CLI", - "roles": [ - "processor" - ], - "url": "https://pypi.org/project/fiboa-cli" - }, - { - "name": "Source Cooperative", - "roles": [ - "host" - ], - "url": "https://beta.source.coop/fiboa/de-nrw/" - } - ], - "extent": { - "spatial": { - "bbox": [ - [ - 5.8659988131, - 50.3226989435, - 9.4476584861, - 52.5310351488 - ] - ] - }, - "temporal": { - "interval": [ - [ - "2005-02-28T00:00:00Z", - "2024-03-28T00:00:00Z" - ] - ] - } - }, - "links": [ - { - "href": "https://www.govdata.de/dl-de/by-2-0", - "title": "Data licence Germany - attribution - Version 2.0", - "type": "text/html", - "rel": "license" - } - ] -} \ No newline at end of file diff --git a/geoparquet/README.md b/geoparquet/README.md index c2fcf39..3ecad3b 100644 --- a/geoparquet/README.md +++ b/geoparquet/README.md @@ -1,8 +1,9 @@ # GeoParquet Encoding Specification The Geoparquet encoding defines how field boundaries compliant to fiboa must be published. -The generic GeoParquet format is defined in the -[OGC GeoParquet specification v1.0.0](https://geoparquet.org/releases/v1.0.0/). +The generic GeoParquet format is defined in the OGC GeoParquet specification, +either version [v1.0.0](https://geoparquet.org/releases/v1.0.0/) +or [v1.1.0](https://geoparquet.org/releases/v1.1.0/). We aim to support any future version of GeoParquet, too. > [!NOTE] @@ -13,7 +14,7 @@ We aim to support any future version of GeoParquet, too. ## Collection -The GeoParquet file must embed the [fiboa Collection](../core/README.md#collection) +The GeoParquet file must embed the collectio-level metadata in the Parquet metadata in a property named `fiboa`. It is recommended to additionally provide the fiboa Collection as a separate JSON file, too. @@ -30,4 +31,3 @@ i.e. the column can be missing from the GeoParquet file. The mapping between the Parquet data types and the fiboa data types, can be found in the [data type mapping](datatypes.md). - From 8d5b796f092df149307f7b75e7e36fb22ec1a5d8 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 4 Feb 2025 23:00:18 +0100 Subject: [PATCH 02/15] Migrate fiboa_extensions and fiboa_version to fiboa_schemas --- CHANGELOG.md | 6 ++++-- core/README.md | 17 ++++++++++------- core/schema/schema.yaml | 11 +++++------ .../examples/featurecollection/features.json | 2 +- geojson/examples/individual-features/12324.json | 4 ++-- geojson/examples/individual-features/2713.json | 2 +- 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07ee2b6..75ae0a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed -- ... +- Switched from v0.1.0 to v0.2.0 of the schema language +- Renamed `fiboa_extensions` to `fiboa_schemas` ### Deprecated @@ -22,7 +23,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Removed -- Value `administrative` was removed from `determination_method` in favour of the new property `category` +- Value `administrative` was removed from `determination_method` in favor of the new property `category` +- `fiboa_version` in favor of adding the schema URL of the specification to `fiboa_schemas`. ### Fixed diff --git a/core/README.md b/core/README.md index 4d16c5f..55fac17 100644 --- a/core/README.md +++ b/core/README.md @@ -25,13 +25,16 @@ This allows to define a clear mapping between the core specification and its enc ## General Properties -| Property Name | Data Type | Description | -| ---------------- | -------------- | ----------- | -| fiboa_version | string | **REQUIRED.** Version number of the fiboa specification the collection implements. | -| fiboa_extensions | array\ | A list of URIs to extensions the collection implements. | -| id | string | **REQUIRED.** A unique identifier for the field. It must be unique per collection, i.e. `collection` and `id` form a unique identifier. | -| collection | string | The identifier of the parent collection. | -| category | array\ | A set of categories the field boundary belongs to. | +| Property Name | Data Type | Description | +| ------------- | -------------- | ----------- | +| fiboa_schemas | array\ | **REQUIRED.** A list of URLs to schemas the collection implements. | +| id | string | **REQUIRED.** A unique identifier for the field. It must be unique per collection, i.e. `collection` and `id` form a unique identifier. | +| collection | string | The identifier of the parent collection. | +| category | array\ | A set of categories the field boundary belongs to. | + +**fiboa_schemas:** The schemas the collection implements. Must be URLs to the schema YAML files. + +The schema for this specification (see above) is required to be provided. **collection:** The collection identifier is usually only needed for merged datasets. diff --git a/core/schema/schema.yaml b/core/schema/schema.yaml index e652951..af6de93 100644 --- a/core/schema/schema.yaml +++ b/core/schema/schema.yaml @@ -1,18 +1,17 @@ $schema: https://fiboa.github.io/schema/v0.1.0/schema.json required: - - fiboa_version + - fiboa_schemas - id - geometry properties: - fiboa_version: - type: string - enum: - - 0.2.0 - fiboa_extensions: + fiboa_schemas: type: array items: type: string format: uri + contains: + enum: + - https://fiboa.github.io/specification/v0.2.0/schema.yaml id: type: string minLength: 1 diff --git a/geojson/examples/featurecollection/features.json b/geojson/examples/featurecollection/features.json index c7041de..a656fa7 100644 --- a/geojson/examples/featurecollection/features.json +++ b/geojson/examples/featurecollection/features.json @@ -1,7 +1,7 @@ { "fiboa": { - "fiboa_version": "0.2.0", "fiboa_extensions": [ + "https://fiboa.github.io/specification/v0.2.0/schema.yaml", "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml", "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" ], diff --git a/geojson/examples/individual-features/12324.json b/geojson/examples/individual-features/12324.json index 5752024..8db0f44 100644 --- a/geojson/examples/individual-features/12324.json +++ b/geojson/examples/individual-features/12324.json @@ -2,8 +2,8 @@ "id": "12324", "type": "Feature", "properties": { - "fiboa_version": "0.2.0", - "fiboa_extensions": [ + "fiboa_schemas": [ + "https://fiboa.github.io/specification/v0.2.0/schema.yaml", "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" ], "flik": "DENWLI0542130247", diff --git a/geojson/examples/individual-features/2713.json b/geojson/examples/individual-features/2713.json index 57e1559..ce5aab7 100644 --- a/geojson/examples/individual-features/2713.json +++ b/geojson/examples/individual-features/2713.json @@ -2,8 +2,8 @@ "id": "2713", "type": "Feature", "properties": { - "fiboa_version": "0.2.0", "fiboa_extensions": [ + "https://fiboa.github.io/specification/v0.2.0/schema.yaml", "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" ], "flik": "DENWLI0540210084", From 89bef1cf19e4d3c3008eb764679efd4fcf2b82cc Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Sun, 9 Mar 2025 14:06:32 +0100 Subject: [PATCH 03/15] rename: fiboa_schemas => schemas --- CHANGELOG.md | 4 ++-- core/README.md | 4 ++-- core/schema/schema.yaml | 4 ++-- geojson/examples/individual-features/12324.json | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75ae0a6..93198bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed - Switched from v0.1.0 to v0.2.0 of the schema language -- Renamed `fiboa_extensions` to `fiboa_schemas` +- Renamed `fiboa_extensions` to `schemas` ### Deprecated @@ -24,7 +24,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Removed - Value `administrative` was removed from `determination_method` in favor of the new property `category` -- `fiboa_version` in favor of adding the schema URL of the specification to `fiboa_schemas`. +- `fiboa_version` in favor of adding the schema URL of the specification to `schemas`. ### Fixed diff --git a/core/README.md b/core/README.md index 55fac17..25d8adc 100644 --- a/core/README.md +++ b/core/README.md @@ -27,12 +27,12 @@ This allows to define a clear mapping between the core specification and its enc | Property Name | Data Type | Description | | ------------- | -------------- | ----------- | -| fiboa_schemas | array\ | **REQUIRED.** A list of URLs to schemas the collection implements. | +| schemas | array\ | **REQUIRED.** A list of URLs to schemas the collection implements. | | id | string | **REQUIRED.** A unique identifier for the field. It must be unique per collection, i.e. `collection` and `id` form a unique identifier. | | collection | string | The identifier of the parent collection. | | category | array\ | A set of categories the field boundary belongs to. | -**fiboa_schemas:** The schemas the collection implements. Must be URLs to the schema YAML files. +**schemas:** The schemas the collection implements. Must be URLs to the schema YAML files. The schema for this specification (see above) is required to be provided. diff --git a/core/schema/schema.yaml b/core/schema/schema.yaml index af6de93..f9938a1 100644 --- a/core/schema/schema.yaml +++ b/core/schema/schema.yaml @@ -1,10 +1,10 @@ $schema: https://fiboa.github.io/schema/v0.1.0/schema.json required: - - fiboa_schemas + - schemas - id - geometry properties: - fiboa_schemas: + schemas: type: array items: type: string diff --git a/geojson/examples/individual-features/12324.json b/geojson/examples/individual-features/12324.json index 8db0f44..fd9951d 100644 --- a/geojson/examples/individual-features/12324.json +++ b/geojson/examples/individual-features/12324.json @@ -2,7 +2,7 @@ "id": "12324", "type": "Feature", "properties": { - "fiboa_schemas": [ + "schemas": [ "https://fiboa.github.io/specification/v0.2.0/schema.yaml", "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" ], From 02576c65777683a41616f569bc3eafa7345da961 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Sun, 9 Mar 2025 17:59:20 +0100 Subject: [PATCH 04/15] Fixes --- core/README.md | 6 +++--- core/schema/schema.yaml | 2 +- geoparquet/README.md | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/README.md b/core/README.md index 25d8adc..c900a48 100644 --- a/core/README.md +++ b/core/README.md @@ -14,14 +14,14 @@ Collection and Feature level. ## Schema The data types in the following document are defined in -[fiboa Schema](https://github.com/fiboa/schema), v0.1.0. +[fiboa Schema](https://github.com/fiboa/schema), v0.2.0. fiboa Schema defines a (limited) set of data types and a vocabulary to express additional constraints for these data types. This allows to define a clear mapping between the core specification and its encodings. -- [Data types](https://github.com/fiboa/schema/blob/v0.1.0/datatypes.md) -- [Vocabulary](https://github.com/fiboa/schema/blob/v0.1.0/README.md#vocabulary) +- [Data types](https://github.com/fiboa/schema/blob/v0.2.0/datatypes.md) +- [Vocabulary](https://github.com/fiboa/schema/blob/v0.2.0/README.md#vocabulary) ## General Properties diff --git a/core/schema/schema.yaml b/core/schema/schema.yaml index f9938a1..cf7bcd3 100644 --- a/core/schema/schema.yaml +++ b/core/schema/schema.yaml @@ -1,4 +1,4 @@ -$schema: https://fiboa.github.io/schema/v0.1.0/schema.json +$schema: https://fiboa.github.io/schema/v0.2.0/schema.json required: - schemas - id diff --git a/geoparquet/README.md b/geoparquet/README.md index 3ecad3b..88bf56d 100644 --- a/geoparquet/README.md +++ b/geoparquet/README.md @@ -14,7 +14,7 @@ We aim to support any future version of GeoParquet, too. ## Collection -The GeoParquet file must embed the collectio-level metadata +The GeoParquet file must embed the collection-level metadata in the Parquet metadata in a property named `fiboa`. It is recommended to additionally provide the fiboa Collection as a separate JSON file, too. From d49b8893670aa99d71c875593512656192ff612b Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 10 Mar 2025 21:37:34 +0100 Subject: [PATCH 05/15] Collection-level updates --- CHANGELOG.md | 2 + best-practices/README.md | 7 +++ core/README.md | 14 +++++- core/schema/schema.yaml | 1 + geojson/README.md | 15 ++++-- geojson/datatypes.md | 44 +++++++++--------- .../examples/featurecollection/features.json | 15 +++--- .../examples/individual-features/2713.json | 2 +- geoparquet/README.md | 7 ++- geoparquet/datatypes.md | 46 +++++++++---------- 10 files changed, 92 insertions(+), 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93198bd..2eae19f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Property `category` - Property `determination_details` +- Information about the encoding of datatypes at the collection-level ### Changed - Switched from v0.1.0 to v0.2.0 of the schema language - Renamed `fiboa_extensions` to `schemas` +- GeoJSON: Switched `contentEncoding` for data type `binary` from `binary` to `base64` ### Deprecated diff --git a/best-practices/README.md b/best-practices/README.md index 3ba9c78..9632de9 100644 --- a/best-practices/README.md +++ b/best-practices/README.md @@ -7,3 +7,10 @@ All properties should be using snake case. For example a field for a land-use class should be named `landuse_class` instead of `landuseClass`. + +## Extension prefixes + +All properties in an extensions should have a common prefix. +Extensions commonly use the colon (`:`) as separator between prefix and property name, e.g. `crop:name`. +A single underscore (`_`) should be avoided to avoid conflicts with other property names (see [Casing](#casing)). +Nevertheless, the separator can be chosen freely by extension authors. diff --git a/core/README.md b/core/README.md index c900a48..2ef2224 100644 --- a/core/README.md +++ b/core/README.md @@ -23,6 +23,16 @@ This allows to define a clear mapping between the core specification and its enc - [Data types](https://github.com/fiboa/schema/blob/v0.2.0/datatypes.md) - [Vocabulary](https://github.com/fiboa/schema/blob/v0.2.0/README.md#vocabulary) +## Collections + +A Collection is a group of one or more features with a unique identifier (see property `collection`). + +Each collection must have a single set of applicable schemas. + +Any property that consists of the same value across all features can be de-duplicated to the collection-level +if more than two features are available for the collection. +The specific location and behaviour of collection-level data is specified in the encoding-specific specifications. + ## General Properties | Property Name | Data Type | Description | @@ -33,10 +43,10 @@ This allows to define a clear mapping between the core specification and its enc | category | array\ | A set of categories the field boundary belongs to. | **schemas:** The schemas the collection implements. Must be URLs to the schema YAML files. - The schema for this specification (see above) is required to be provided. -**collection:** The collection identifier is usually only needed for merged datasets. +**collection:** The collection identifier is usually only needed for merged datasets and it is **required** in this case. +A validatior can't check whether the `collection` property is required, the data providers must ensure this. **category:** Choose any (unique) combination of the following values: diff --git a/core/schema/schema.yaml b/core/schema/schema.yaml index cf7bcd3..eddc195 100644 --- a/core/schema/schema.yaml +++ b/core/schema/schema.yaml @@ -10,6 +10,7 @@ properties: type: string format: uri contains: + type: string enum: - https://fiboa.github.io/specification/v0.2.0/schema.yaml id: diff --git a/geojson/README.md b/geojson/README.md index bcff673..80946d0 100644 --- a/geojson/README.md +++ b/geojson/README.md @@ -14,11 +14,18 @@ The generic GeoJSON format is defined in ## FeatureCollection -A FeatureCollection may have a top-level property named `fiboa`. -If present, it contains all properties that are common across the features. -In validation they must be copied to the `properties` in each Feature. +A FeatureCollection may have a top-level property named `fiboa` to contain all collection-level data. +If present, it contains all properties that are common across the features +and the features shall not contain those properties. +Validation must ensure that the collection-level properties are taken into account. All features in a FeatureCollection must be fiboa-compliant. +The following properties can't be collection-level properties: + +- `id` +- `geometry` +- `bbox` + ## Feature Each [fiboa Feature](../core/README.md#features) must be a valid @@ -42,7 +49,7 @@ The following properties are defined for a GeoJSON Feature (at the top-level of ### `properties` -Must include any property that is required by the fiboa core specification (currently `fiboa_version`). +Must include any property that is required by the fiboa core specification. May include any additional property. All properties defined by the core specification (except for `id`, `geometry` and `bbox`) or extensions should be provided here. diff --git a/geojson/datatypes.md b/geojson/datatypes.md index 5a747de..661183e 100644 --- a/geojson/datatypes.md +++ b/geojson/datatypes.md @@ -3,28 +3,28 @@ The following table shows the data types that are used by fiboa in the Property definitions. It also shows the mapping to the GeoJSON data types. -| fiboa data type | (Geo)JSON | -| --------------------------------------------------- | ------------------------------------------------------------ | -| boolean | boolean | -| int8 | integer
minimum: -128
maximum: 127 | -| uint8 | integer
minimum: 0
maximum: 255 | -| int16 | integer
minimum: -32768
maximum: 32767 | -| uint16 | integer
minimum: 0
maximum: 65535 | -| int32 | integer
minimum: -2147483648
maximum: 2147483647 | -| uint32 | integer
minimum: 0
maximum: 4294967295 | -| int64 | integer
minimum: -9223372036854775808
maximum: 9223372036854775807 | -| uint64 | integer
minimum: 0
maximum: 18446744073709551615 | -| float
IEEE 32-bit | number
minimum: ?
maximum: ? | -| double
IEEE 64-bit | number
minimum: ?
maximum: ? | -| binary | string
contentEncoding: binary | -| string
charset: UTF-8 | string | -| array | array | -| object
keys: string
values: any | object
additionalProperties: false | -| date | string
format: date | -| date-time
with milliseconds
timezone: UTC | string
format: date-time
pattern: Z$ | -| geometry | [object with schema](https://geojson.org/schema/Geometry.json) | -| bounding-box
x and y only, no z | array
minItems: 4
maxItems: 4
items: number | -| *required* (not a datatype) | null | +| fiboa data type | (Geo)JSON | Collection-level | +| --------------------------------------------------- | ------------------------------------------------------------ | ---------------- | +| boolean | boolean | yes | +| int8 | integer
minimum: -128
maximum: 127 | yes | +| uint8 | integer
minimum: 0
maximum: 255 | yes | +| int16 | integer
minimum: -32768
maximum: 32767 | yes | +| uint16 | integer
minimum: 0
maximum: 65535 | yes | +| int32 | integer
minimum: -2147483648
maximum: 2147483647 | yes | +| uint32 | integer
minimum: 0
maximum: 4294967295 | yes | +| int64 | integer
minimum: -9223372036854775808
maximum: 9223372036854775807 | yes | +| uint64 | integer
minimum: 0
maximum: 18446744073709551615 | yes | +| float
IEEE 32-bit | number
minimum: ?
maximum: ? | yes | +| double
IEEE 64-bit | number
minimum: ?
maximum: ? | yes | +| binary | string
contentEncoding: base64 | yes | +| string
charset: UTF-8 | string | yes | +| array | array | yes | +| object
keys: string
values: any | object
additionalProperties: false | yes | +| date | string
format: date | yes | +| date-time
with milliseconds
timezone: UTC | string
format: date-time
pattern: Z$ | yes | +| geometry | [object with schema](https://geojson.org/schema/Geometry.json) | no | +| bounding-box
x and y only, no z | array
minItems: 4
maxItems: 4
items: number | no | +| *if a property is not required* | null | yes | ## Potential issues in conversion diff --git a/geojson/examples/featurecollection/features.json b/geojson/examples/featurecollection/features.json index a656fa7..d3b7fd7 100644 --- a/geojson/examples/featurecollection/features.json +++ b/geojson/examples/featurecollection/features.json @@ -1,9 +1,10 @@ { "fiboa": { - "fiboa_extensions": [ + "schemas": [ "https://fiboa.github.io/specification/v0.2.0/schema.yaml", "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml", - "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" + "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml", + "https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml" ], "collection": "de_nrw", "license": "dl-de/by-2-0", @@ -18,8 +19,8 @@ "inspire:id": "https://geodaten.nrw.de/id/inspire-lc-fb/landcoverunit/12324", "flik": "DENWLI0542130247", "determination_datetime": "2005-02-28T00:00:00Z", - "nutz_code": "A", - "nutz_txt": "Ackerland", + "crop:code": "A", + "crop:name": "Ackerland", "area": 1.631100058555603 }, "geometry": { @@ -86,9 +87,9 @@ "properties": { "inspire:id": "https://geodaten.nrw.de/id/inspire-lc-fb/landcoverunit/2713", "flik": "DENWLI0540210084", - "determination_datetime": "2005-02-28T00:00:00Z", - "nutz_code": "A", - "nutz_txt": "Ackerland", + "determination_datetime": "2005-02-22T00:00:00Z", + "crop:code": "W", + "crop:name": "Weide", "area": 1.8975000381469727 }, "geometry": { diff --git a/geojson/examples/individual-features/2713.json b/geojson/examples/individual-features/2713.json index ce5aab7..329e3ed 100644 --- a/geojson/examples/individual-features/2713.json +++ b/geojson/examples/individual-features/2713.json @@ -2,7 +2,7 @@ "id": "2713", "type": "Feature", "properties": { - "fiboa_extensions": [ + "schemas": [ "https://fiboa.github.io/specification/v0.2.0/schema.yaml", "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" ], diff --git a/geoparquet/README.md b/geoparquet/README.md index 88bf56d..55c75f1 100644 --- a/geoparquet/README.md +++ b/geoparquet/README.md @@ -17,8 +17,6 @@ We aim to support any future version of GeoParquet, too. The GeoParquet file must embed the collection-level metadata in the Parquet metadata in a property named `fiboa`. -It is recommended to additionally provide the fiboa Collection as a separate JSON file, too. - ## Features Each [fiboa Feature](../core/README.md#features) corresponds to a row in a GeoParquet file. @@ -31,3 +29,8 @@ i.e. the column can be missing from the GeoParquet file. The mapping between the Parquet data types and the fiboa data types, can be found in the [data type mapping](datatypes.md). + +## Best practices + +For data with a lot of repetition, brotli compression is recommended. +This applies particularly for merged datasets that don't deduplicate properties to the collection-level. diff --git a/geoparquet/datatypes.md b/geoparquet/datatypes.md index 403bbee..f999451 100644 --- a/geoparquet/datatypes.md +++ b/geoparquet/datatypes.md @@ -3,28 +3,28 @@ The following table shows the data types that are used by fiboa in the Property definitions. It also shows the mapping to the GeoParquet data types. -| fiboa Schema data type | (Geo)Parquet | -| --------------------------------------------------- | ------------------------------------------------------------ | -| boolean | BOOLEAN | -| int8 | IntType
bitWidth: 8
isSigned: true
(deprecated: INT_8) | -| uint8 | IntType
bitWidth: 8
isSigned: false
(deprecated: UINT_8) | -| int16 | IntType
bitWidth: 16
isSigned: true
(deprecated: INT_16) | -| uint16 | IntType
bitWidth: 16
isSigned: false
(deprecated: UINT_16) | -| int32 | IntType
bitWidth: 32
isSigned: true
(deprecated: INT_32) | -| uint32 | IntType
bitWidth: 64
isSigned: false
(deprecated: UINT_32) | -| int64 | IntType
bitWidth: 64
isSigned: true
(deprecated: INT_64) | -| uint64 | IntType
bitWidth: 64
isSigned: false
(deprecated: UINT_64) | -| float
IEEE 32-bit | FLOAT | -| double
IEEE 64-bit | DOUBLE | -| binary | BYTE_ARRAY | -| string
charset: UTF-8 | STRING (BYTE_ARRAY) | -| array | LIST | -| object
keys: string
values: any | STRUCT / MAP | -| date | DATE (INT32) | -| date-time
with milliseconds
timezone: UTC | TimestampType (INT64)
isAdjustedToUTC: true
unit: MILLIS
(deprecated: TIMESTAMP_MILLIS) | -| geometry | BYTE_ARRAY
encoded as WKB | -| bounding-box
x and y only, no z | STRUCT(xmin FLOAT, ymin FLOAT, xmax FLOAT, ymax FLOAT) | -| *if a field is not required* | [Nullity](https://parquet.apache.org/docs/file-format/nulls/) | +| fiboa Schema data type | (Geo)Parquet | Collection-level | +| --------------------------------------------------- | ------------------------------------------------------------ | ------------------------------- | +| boolean | BOOLEAN | yes | +| int8 | IntType
bitWidth: 8
isSigned: true
(deprecated: INT_8) | yes | +| uint8 | IntType
bitWidth: 8
isSigned: false
(deprecated: UINT_8) | yes | +| int16 | IntType
bitWidth: 16
isSigned: true
(deprecated: INT_16) | yes | +| uint16 | IntType
bitWidth: 16
isSigned: false
(deprecated: UINT_16) | yes | +| int32 | IntType
bitWidth: 32
isSigned: true
(deprecated: INT_32) | yes | +| uint32 | IntType
bitWidth: 64
isSigned: false
(deprecated: UINT_32) | yes | +| int64 | IntType
bitWidth: 64
isSigned: true
(deprecated: INT_64) | yes | +| uint64 | IntType
bitWidth: 64
isSigned: false
(deprecated: UINT_64) | yes | +| float
IEEE 32-bit | FLOAT | yes | +| double
IEEE 64-bit | DOUBLE | yes | +| binary | BYTE_ARRAY | as string, base64-encoded | +| string
charset: UTF-8 | STRING (BYTE_ARRAY) | yes | +| array | LIST | yes | +| object
keys: string
values: any | STRUCT / MAP | yes | +| date | DATE (INT32) | as string, compliant to ISO8601 | +| date-time
with milliseconds
timezone: UTC | TimestampType (INT64)
isAdjustedToUTC: true
unit: MILLIS
(deprecated: TIMESTAMP_MILLIS) | as string, compliant to ISO8601 | +| geometry | BYTE_ARRAY
encoded as WKB | no | +| bounding-box
x and y only, no z | STRUCT(xmin FLOAT, ymin FLOAT, xmax FLOAT, ymax FLOAT) | no | +| *if a property is not required* | [Nullity](https://parquet.apache.org/docs/file-format/nulls/) | yes | The integer data types and the data type string can also be mapped to the ENUM data type in Parquet if a pre-defined set of values is available. @@ -45,4 +45,4 @@ The following data types occur in Parquet, but are not currently supported in fi ## Potential issues in conversion -- The micro/nanosecond precision of Datetime / Times may got lost +- The micro/nanosecond precision of Datetime / Times may get lost From 6c1e5553ba2e14350e4abf00f3225fbb06899202 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 10 Mar 2025 21:41:23 +0100 Subject: [PATCH 06/15] Finer-grained CI, extend CONTRIBUTING --- .github/workflows/test.yaml | 27 +++++++++++++++++++++++++-- CONTRIBUTING.md | 3 +++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 43c1462..b7f9e3c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -3,7 +3,7 @@ on: - push - pull_request jobs: - deploy: + docs: runs-on: ubuntu-latest steps: - uses: actions/setup-python@v5 @@ -15,7 +15,30 @@ jobs: pip install pipenv pipenv install - run: pipenv run test-docs + schema: + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v5 + with: + python-version: '>=3.9' + - uses: actions/checkout@v4 + - name: Install pipenv + run: | + pip install pipenv + pipenv install - run: pipenv run test-schema + examples: + runs-on: ubuntu-latest + needs: schema + steps: + - uses: actions/setup-python@v5 + with: + python-version: '>=3.9' + - uses: actions/checkout@v4 + - name: Install pipenv + run: | + pip install pipenv + pipenv install - run: pipenv run test-geojson-features - run: pipenv run test-geojson-collection - - run: pipenv run test-geoparquet \ No newline at end of file + - run: pipenv run test-geoparquet diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 936f0f0..f0bfd14 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,6 +40,9 @@ We use pipenv to execute the tests. Start with the following command in the folder where this README is located: `pip install pipenv --user` +Install the dependencies for the test: +`pipenv install` + Finally, you can run the tests as follows: - To check the markdown run: `pipenv run test-docs` From ce4dcfdc8b54646d8a36286ae90c42b8cfc4d093 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 10 Mar 2025 22:29:51 +0100 Subject: [PATCH 07/15] CLarifications --- core/README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/core/README.md b/core/README.md index 2ef2224..a03290b 100644 --- a/core/README.md +++ b/core/README.md @@ -33,6 +33,15 @@ Any property that consists of the same value across all features can be de-dupli if more than two features are available for the collection. The specific location and behaviour of collection-level data is specified in the encoding-specific specifications. +Example: + +You have two different field boundary datasets named `abc` (CC-0 licensed) and `xyz` (CC-BY-4.0 licensed). +If you store the datasets separately, you can store the license in the collection-level data +as the value for the property is the same for all features. +Once you merged the two datasets, you must ensure that a unique identifier for the collection is provieded +(here: `abc` and `xyz`) so that IDs are unique. +Additionally, you have to add the license property on the feature-level as the licenses are now twofold. + ## General Properties | Property Name | Data Type | Description | @@ -46,7 +55,8 @@ The specific location and behaviour of collection-level data is specified in the The schema for this specification (see above) is required to be provided. **collection:** The collection identifier is usually only needed for merged datasets and it is **required** in this case. -A validatior can't check whether the `collection` property is required, the data providers must ensure this. +A validatior can't check whether the `collection` property is required, the data providers or tooling must ensure that if data from two different sources are merged that a `collection` property with distinct values is provided. +Otherwise, IDs may conflict or extension requirements might not be fulfilled and validation could fail. **category:** Choose any (unique) combination of the following values: From 82b536ba99ded0edfdb035e9911e9505beef3373 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 11 Mar 2025 23:58:55 +0100 Subject: [PATCH 08/15] Rewrote many parts, removed links, collection-level data as foreign members to FeatureCollection --- CHANGELOG.md | 7 +- core/README.md | 120 +++++++++------ core/schema/schema.yaml | 1 + geojson/README.md | 82 +++++----- .../examples/featurecollection/features.json | 145 ++++-------------- .../examples/individual-features/12324.json | 59 ++----- .../examples/individual-features/2713.json | 79 +++------- geoparquet/README.md | 25 ++- 8 files changed, 199 insertions(+), 319 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2eae19f..fadecb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,16 +17,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Switched from v0.1.0 to v0.2.0 of the schema language - Renamed `fiboa_extensions` to `schemas` +- Schemas must be valid HTTP(S) URLs - GeoJSON: Switched `contentEncoding` for data type `binary` from `binary` to `base64` - -### Deprecated - -- ... +- GeoJSON FeatureCollection: Collection-level data is provided at the top-level, not in a `fiboa` property ### Removed - Value `administrative` was removed from `determination_method` in favor of the new property `category` - `fiboa_version` in favor of adding the schema URL of the specification to `schemas`. +- GeoJSON Feature: `links` property ### Fixed diff --git a/core/README.md b/core/README.md index a03290b..3605deb 100644 --- a/core/README.md +++ b/core/README.md @@ -1,39 +1,66 @@ -# Core Specification +# Core Specification -This specification describes the core data and metadata properties for both at the -Collection and Feature level. +This specification describes the core data and metadata properties that describe a fiboa Feature. +The specification doesn't distinguish between collection-level and feature-level properties, +common definitions are shared across these levels. - A Collection refers to a group of one or more features. - A Feature is a single field geometry with additional properties. -> [!NOTE] -> The Core Specification is still work in progress. Feedback is welcome! - - **Schema:** -## Schema +## Table of Contents -The data types in the following document are defined in -[fiboa Schema](https://github.com/fiboa/schema), v0.2.0. +- [General Properties](#general-properties) + - [schemas](#schemas) + - [id](#id) + - [collection](#collection) + - [category](#category) +- [Spatial Properties](#spatial-properties) + - [area / perimeter](#area--perimeter) +- [Determination Properties](#determination-properties) + - [determination\_datetime](#determination_datetime) + - [determination\_method](#determination_method) +- [Schema Language](#schema-language) -fiboa Schema defines a (limited) set of data types and a vocabulary to express -additional constraints for these data types. -This allows to define a clear mapping between the core specification and its encodings. +## General Properties + +| Property Name | Data Type | Description | +| ------------- | -------------- | ----------- | +| schemas | array\ | **REQUIRED.** A list of schemas the collection implements. | +| id | string | **REQUIRED.** An identifier for the field. | +| collection | string | The identifier of the parent collection. | +| category | array\ | A set of categories the field boundary belongs to. | -- [Data types](https://github.com/fiboa/schema/blob/v0.2.0/datatypes.md) -- [Vocabulary](https://github.com/fiboa/schema/blob/v0.2.0/README.md#vocabulary) +### schemas -## Collections +The schemas the collection implements. +Each schema must be a valid HTTP(S) URLs to an existing YAML files compliant to fiboa Schema. +The schema for this specification (see above) is required to be provided. + +Each `collection` must have a single set of applicable schemas. + +The schema URI listed above is required to be present in the `schemas` array. + +### id -A Collection is a group of one or more features with a unique identifier (see property `collection`). +It must be unique per collection, i.e. `collection` and `id` form a unique identifier. -Each collection must have a single set of applicable schemas. +### collection -Any property that consists of the same value across all features can be de-duplicated to the collection-level -if more than two features are available for the collection. +A collection is a group of one or more features with a unique identifier, stored in the `collection` property. + +The collection identifier is usually only needed for merged datasets and it is **required** in this case. +Implementations may create collection identifiers if datasets that don't provide a collection identifer are getting merged. +A validatior can't know whether the `collection` property is required, the data providers or tooling must handle this, +i.e. if data from two different sources is merged, a `collection` property with distinct values must be provided. +This ensures unique IDs through the combination of the properties `id` and `collection`. + +Encodings may support to store properties that consists of the same value across all features at the collection-level. +This de-duplicates data for more efficient resource usage, but only applies if more than two features are available for the collection. The specific location and behaviour of collection-level data is specified in the encoding-specific specifications. -Example: +**Example:** You have two different field boundary datasets named `abc` (CC-0 licensed) and `xyz` (CC-BY-4.0 licensed). If you store the datasets separately, you can store the license in the collection-level data @@ -42,23 +69,9 @@ Once you merged the two datasets, you must ensure that a unique identifier for t (here: `abc` and `xyz`) so that IDs are unique. Additionally, you have to add the license property on the feature-level as the licenses are now twofold. -## General Properties - -| Property Name | Data Type | Description | -| ------------- | -------------- | ----------- | -| schemas | array\ | **REQUIRED.** A list of URLs to schemas the collection implements. | -| id | string | **REQUIRED.** A unique identifier for the field. It must be unique per collection, i.e. `collection` and `id` form a unique identifier. | -| collection | string | The identifier of the parent collection. | -| category | array\ | A set of categories the field boundary belongs to. | +### category -**schemas:** The schemas the collection implements. Must be URLs to the schema YAML files. -The schema for this specification (see above) is required to be provided. - -**collection:** The collection identifier is usually only needed for merged datasets and it is **required** in this case. -A validatior can't check whether the `collection` property is required, the data providers or tooling must ensure that if data from two different sources are merged that a `collection` property with distinct values is provided. -Otherwise, IDs may conflict or extension requirements might not be fulfilled and validation could fail. - -**category:** Choose any (unique) combination of the following values: +Choose any (unique) combination of the following values: - `conceptual`: This boundary represents how the grower thinks of a field, and what they would share with service providers to allocate information at the highest level of the field concept within their operation. @@ -81,21 +94,26 @@ The categories are based on the [definitions of the AgGateway initiative](https: | area | float | Area of the field, in hectares. Must be > 0 and <= 100,000. | | perimeter | float | Perimeter of the field, in meters. Must be > 0 and <= 125,000. | -**area/perimeter:** These are derived attributes from the geometry itself, +### area / perimeter + +These are derived attributes from the geometry itself, and must match the geometry's area/perimeter. If they do not match then the geometry should be considered canonical. Validators may flag the value as invalid if it exceeds a certain threshold. ## Determination Properties -| Property Name | Data Type | Description | -| ---------------------- | --------- | ------------------------------------------------------------ | -| determination_method | string | The boundary creation method, one of the values below. | -| determination_datetime | datetime | The last timestamp at which the field did exist and was observed, in UTC. | +| Property Name | Data Type | Description | +| ---------------------- | --------- | ----------- | +| determination_method | string | The boundary creation method, one of the values below. | +| determination_datetime | datetime | The last timestamp at which the field did exist and was observed. | | determination_details | string | Further details about the determination, especially the methodology. | -**determination_datetime**: In case the source of the information is an -interval or a set of timestamps, use the end. +### determination_datetime + +The last timestamp at which the field did exist and was observed, provided in the UTC timezone. + +In case the source of the information is an interval or a set of timestamps, use the end. For example, for ML you'd use the timestamp of the last image and not the timestamp of the actual execution. @@ -103,7 +121,9 @@ timestamp of the actual execution. > We define more temporal properties in the > [timestamps extension](https://github.com/fiboa/timestamps). -**determination_method**: Must be one of the following values: +### determination_method + +The determination method must be one of the following values: - `manual`: Hand created from imagery, e.g. using a tool to point and click on a map. - `surveyed`: Determined through a professional land survey measuring the actual distances and angles on the ground. @@ -114,3 +134,15 @@ timestamp of the actual execution. The determination methods are based on the definitions of the [AgGateway initiative - WG17](https://aggateway.org/). The specific values have [not been published yet](https://github.com/fiboa/specification/issues/31). + +## Schema Language + +The schema language used for fiboa is [fiboa Schema](https://github.com/fiboa/schema), version 0.2.0. + +The data types in the tables above are defined in the document +[Data Types](https://github.com/fiboa/schema/blob/v0.2.0/datatypes.md). + +fiboa Schema defines a (limited) set of data types and a +[vocabulary](https://github.com/fiboa/schema/blob/v0.2.0/README.md#vocabulary) +to express additional constraints for these data types. +This allows to define a clear mapping between the core specification and its encodings. diff --git a/core/schema/schema.yaml b/core/schema/schema.yaml index eddc195..8886fc2 100644 --- a/core/schema/schema.yaml +++ b/core/schema/schema.yaml @@ -9,6 +9,7 @@ properties: items: type: string format: uri + pattern: ^https?:// contains: type: string enum: diff --git a/geojson/README.md b/geojson/README.md index 80946d0..55f04d1 100644 --- a/geojson/README.md +++ b/geojson/README.md @@ -1,65 +1,73 @@ # GeoJSON Encoding Specification -The GeoJSON encoding defines how field boundaries compliant to fiboa must be published. -The generic GeoJSON format is defined in -[IETF RFC 7946](https://datatracker.ietf.org/doc/html/rfc7946). +The GeoJSON encoding defines to encode field boundaries compliant to fiboa as +GeoJSON as defined in [IETF RFC7946](https://datatracker.ietf.org/doc/html/rfc7946). -> [!NOTE] -> The GeoJSON encoding is still work in progress. Feedback is welcome! +A single fiboa Feature must be encoded as a GeoJSON [`Feature`](#feature). +Multiple fiboa Featurs should be provided as a GeoJSON [`FeatureCollection`](#featurecollection). +Other GeoJSON types are not allowed. -- **[Examples](examples/):** - 1. [as a FeatureCollection](examples/featurecollection/features.json) - 2. [as individual Features with a dedicated Collection](examples/individual-features/) -- **[Datatype mapping](datatypes.md)** +Related documents: -## FeatureCollection - -A FeatureCollection may have a top-level property named `fiboa` to contain all collection-level data. -If present, it contains all properties that are common across the features -and the features shall not contain those properties. -Validation must ensure that the collection-level properties are taken into account. -All features in a FeatureCollection must be fiboa-compliant. - -The following properties can't be collection-level properties: - -- `id` -- `geometry` -- `bbox` +- [Examples](examples/) +- [Datatype mapping](datatypes.md) ## Feature -Each [fiboa Feature](../core/README.md#features) must be a valid +- Example: [individual features](examples/individual-features/) + +Each [fiboa Feature](../core/README.md) must be a valid [GeoJSON Feature](https://datatracker.ietf.org/doc/html/rfc7946#section-3.2). The following properties are defined for a GeoJSON Feature (at the top-level of the object): -| Property Name | Data Type | Description | -| ------------- | ------------------- | ------------------------------------------------------------ | -| id | string | **REQUIRED.** See [id](../core/README.md#general-properties) in the core specification, must not be a `number` | -| type | string | **REQUIRED.** The GeoJSON type, must be: `Feature` | +| Property Name | Data Type | Description | +| ------------- | ------------------- | ----------- | +| id | string | **REQUIRED.** See [id](../core/README.md#id) in the core specification, must not be a `number` | +| type | string | **REQUIRED.** The GeoJSON type, must be: `Feature` | | geometry | object | **REQUIRED.** A [GeoJSON Geometry Object](https://datatracker.ietf.org/doc/html/rfc7946#section-3.1), must not be `null` | | bbox | array\ | A [GeoJSON Bounding Box](https://datatracker.ietf.org/doc/html/rfc7946#section-5) | | properties | object | An object with all additional properties (see [`properties`](#properties)) | -| links | array\ | A list of links (see [`links`](#links)) | + +The mapping between the Parquet data types and the fiboa data types, can be found in the +[data type mapping](datatypes.md). > [!IMPORTANT] > RFC 7946 doesn't support a property named `crs`, which was only available in an earlier version of GeoJSON (2008). > The CRS of the GeoJSON geometry and bbox must be WGS 84 / OGC CRS 84, -> see the [RFC 7946, chapter 4](https://datatracker.ietf.org/doc/html/rfc7946#section-4) for details. +> see the [RFC 7946, chapter 4](https://datatracker.ietf.org/doc/html/rfc7946#section-4) for details. -### `properties` +[Collection-level](../core/README.md#collection) data is not supported. +All properties are provides in the JSON object with the key [`properties`](#properties). + +### properties Must include any property that is required by the fiboa core specification. May include any additional property. All properties defined by the core specification (except for `id`, `geometry` and `bbox`) or extensions should be provided here. -### `links` +## FeatureCollection + +- Example: [a feature collection](examples/featurecollection/features.json) + +All features in a GeoJSON FeatureCollection must be fiboa-compliant. + +Properties can also be stored at the [collection-level](../core/README.md#collection) +if all values for a specific property have the same value in all features. +This de-duplicates data for more efficient resource usage. +All properties are stored on the top-level of the FeatureCollection object as +[foreign members](https://datatracker.ietf.org/doc/html/rfc7946#section-6.1). +The individual features shall not contain any properties that are stored at the collection-level. +Validation must ensure that the collection-level properties are taken into account. + +The following properties in Features can't be collection-level properties: -An array of links where each link conforms to the -[Hyperlink Schema](http://schemas.opengis.net/ogcapi/common/part1/1.0/openapi/schemas/link.yaml) -defined in -[OGC API - Common - Part 1](https://docs.ogc.org/is/19-072/19-072.html#_11b9b4f7-42fc-413a-b63a-e7fb060b5e4b). +- `id` +- `geometry` +- `bbox` -The following relation types are commonly used: +Properties with the following names can#t be moved to the collection-level due to conflicts with the +FeatureCollection properties defined by GeoJSON: -- `self`: Absolute link to the GeoJSON file itself. +- `features` +- `type` diff --git a/geojson/examples/featurecollection/features.json b/geojson/examples/featurecollection/features.json index d3b7fd7..745b4ff 100644 --- a/geojson/examples/featurecollection/features.json +++ b/geojson/examples/featurecollection/features.json @@ -21,65 +21,27 @@ "determination_datetime": "2005-02-28T00:00:00Z", "crop:code": "A", "crop:name": "Ackerland", - "area": 1.631100058555603 + "area": 1.6311 }, "geometry": { "type": "Polygon", "coordinates": [ [ - [ - 7.875243329949302, - 51.7469574917968 - ], - [ - 7.8754156210171224, - 51.74865579902567 - ], - [ - 7.87559517961007, - 51.748657516128716 - ], - [ - 7.875727139469757, - 51.74864762337336 - ], - [ - 7.875865723118926, - 51.74861179149097 - ], - [ - 7.876160946694515, - 51.74853656922356 - ], - [ - 7.876274940061089, - 51.748526513043004 - ], - [ - 7.876646213349393, - 51.74852263605798 - ], - [ - 7.876669177898854, - 51.74759587524452 - ], - [ - 7.876683221091441, - 51.7470291214554 - ], - [ - 7.875243329949302, - 51.7469574917968 - ] + [7.8752433, 51.7469574], + [7.8754156, 51.7486557], + [7.8755951, 51.7486575], + [7.8757271, 51.7486476], + [7.8758657, 51.7486117], + [7.8761609, 51.7485365], + [7.8762749, 51.7485265], + [7.8766462, 51.7485226], + [7.8766691, 51.7475958], + [7.8766832, 51.7470291], + [7.8752433, 51.7469574] ] ] }, - "bbox": [ - 7.875243329949302, - 51.7469574917968, - 7.876683221091441, - 51.748657516128716 - ] + "bbox": [7.8752433, 51.7469574, 7.8766832, 51.7486575] }, { "id": "2713", @@ -90,77 +52,30 @@ "determination_datetime": "2005-02-22T00:00:00Z", "crop:code": "W", "crop:name": "Weide", - "area": 1.8975000381469727 + "area": 1.8975 }, "geometry": { "type": "Polygon", "coordinates": [ [ - [ - 9.279072225112648, - 51.925508828714925 - ], - [ - 9.279848170539884, - 51.92582918268683 - ], - [ - 9.280173032315249, - 51.925963048968214 - ], - [ - 9.280599939130775, - 51.92614034991495 - ], - [ - 9.280660193987938, - 51.926028714865886 - ], - [ - 9.280886077078973, - 51.9256102896548 - ], - [ - 9.281335286046785, - 51.924778127406576 - ], - [ - 9.281305739341624, - 51.92472580957354 - ], - [ - 9.280917027691007, - 51.92458295033388 - ], - [ - 9.279903540966059, - 51.92421337118715 - ], - [ - 9.279817610187122, - 51.92423316888092 - ], - [ - 9.279398358118248, - 51.92501015234708 - ], - [ - 9.279241344298002, - 51.925301083950984 - ], - [ - 9.279072225112648, - 51.925508828714925 - ] + [9.2790722, 51.9255088], + [9.2798481, 51.9258291], + [9.280173, 51.925963], + [9.2805999, 51.9261403], + [9.2806601, 51.9260287], + [9.280886, 51.9256102], + [9.2813352, 51.9247781], + [9.2813057, 51.9247258], + [9.280917, 51.9245829], + [9.2799035, 51.9242133], + [9.2798176, 51.9242331], + [9.2793983, 51.9250101], + [9.2792413, 51.925301], + [9.2790722, 51.9255088] ] ] }, - "bbox": [ - 9.279072225112648, - 51.92421337118715, - 9.281335286046785, - 51.92614034991495 - ] + "bbox": [9.2790722, 51.9242133, 9.2813352, 51.9261403] } ] -} \ No newline at end of file +} diff --git a/geojson/examples/individual-features/12324.json b/geojson/examples/individual-features/12324.json index fd9951d..86a6d82 100644 --- a/geojson/examples/individual-features/12324.json +++ b/geojson/examples/individual-features/12324.json @@ -10,57 +10,24 @@ "determination_datetime": "2005-02-28T00:00:00Z", "nutz_code": "A", "nutz_txt": "Ackerland", - "area": 1.631100058555603 + "area": 1.6311 }, "geometry": { "type": "Polygon", "coordinates": [ [ - [ - 7.875243329949302, - 51.7469574917968 - ], - [ - 7.8754156210171224, - 51.74865579902567 - ], - [ - 7.87559517961007, - 51.748657516128716 - ], - [ - 7.875727139469757, - 51.74864762337336 - ], - [ - 7.875865723118926, - 51.74861179149097 - ], - [ - 7.876160946694515, - 51.74853656922356 - ], - [ - 7.876274940061089, - 51.748526513043004 - ], - [ - 7.876646213349393, - 51.74852263605798 - ], - [ - 7.876669177898854, - 51.74759587524452 - ], - [ - 7.876683221091441, - 51.7470291214554 - ], - [ - 7.875243329949302, - 51.7469574917968 - ] + [7.8752433, 51.7469574], + [7.8754156, 51.7486557], + [7.8755951, 51.7486575], + [7.8757271, 51.7486476], + [7.8758657, 51.7486117], + [7.8761609, 51.7485365], + [7.8762749, 51.7485265], + [7.8766462, 51.7485226], + [7.8766691, 51.7475958], + [7.8766832, 51.7470291], + [7.8752433, 51.7469574] ] ] } -} \ No newline at end of file +} diff --git a/geojson/examples/individual-features/2713.json b/geojson/examples/individual-features/2713.json index 329e3ed..7014874 100644 --- a/geojson/examples/individual-features/2713.json +++ b/geojson/examples/individual-features/2713.json @@ -10,69 +10,30 @@ "determination_datetime": "2005-02-28T00:00:00Z", "nutz_code": "A", "nutz_txt": "Ackerland", - "area": 1.8975000381469727 + "area": 1.8975000 }, "geometry": { "type": "Polygon", "coordinates": [ [ - [ - 9.279072225112648, - 51.925508828714925 - ], - [ - 9.279848170539884, - 51.92582918268683 - ], - [ - 9.280173032315249, - 51.925963048968214 - ], - [ - 9.280599939130775, - 51.92614034991495 - ], - [ - 9.280660193987938, - 51.926028714865886 - ], - [ - 9.280886077078973, - 51.9256102896548 - ], - [ - 9.281335286046785, - 51.924778127406576 - ], - [ - 9.281305739341624, - 51.92472580957354 - ], - [ - 9.280917027691007, - 51.92458295033388 - ], - [ - 9.279903540966059, - 51.92421337118715 - ], - [ - 9.279817610187122, - 51.92423316888092 - ], - [ - 9.279398358118248, - 51.92501015234708 - ], - [ - 9.279241344298002, - 51.925301083950984 - ], - [ - 9.279072225112648, - 51.925508828714925 - ] + [9.2790722, 51.9255088], + [9.2798481, 51.9258291], + [9.2801730, 51.9259630], + [9.2805999, 51.9261403], + [9.2806601, 51.9260287], + [9.2808860, 51.9256102], + [9.2813352, 51.9247781], + [9.2813057, 51.9247258], + [9.2809170, 51.9245829], + [9.2799035, 51.9242133], + [9.2798176, 51.9242331], + [9.2793983, 51.9250101], + [9.2792413, 51.9253010], + [9.2790722, 51.9255088] ] ] - } -} \ No newline at end of file + }, + "bbox": [ + 9.2790722, 51.9242133, 9.2813352, 51.9261403 + ] +} diff --git a/geoparquet/README.md b/geoparquet/README.md index 55c75f1..4cfdc36 100644 --- a/geoparquet/README.md +++ b/geoparquet/README.md @@ -6,20 +6,7 @@ either version [v1.0.0](https://geoparquet.org/releases/v1.0.0/) or [v1.1.0](https://geoparquet.org/releases/v1.1.0/). We aim to support any future version of GeoParquet, too. -> [!NOTE] -> The GeoParquet encoding is still work in progress. Feedback is welcome! - -- **[Examples](examples/)** -- **[Data type mapping](datatypes.md)** - -## Collection - -The GeoParquet file must embed the collection-level metadata -in the Parquet metadata in a property named `fiboa`. - -## Features - -Each [fiboa Feature](../core/README.md#features) corresponds to a row in a GeoParquet file. +Each [fiboa Feature](../core/README.md) corresponds to a row in a GeoParquet file. The properties defined for fiboa Features are made available as individual columns in the GeoParquet file. @@ -27,9 +14,19 @@ Properties that are optional can be omitted if all values are [null values](https://parquet.apache.org/docs/file-format/nulls/), i.e. the column can be missing from the GeoParquet file. +Properties can also be stored at the [collection-level](../core/README.md#collection) if all values in a column have the same value. +This de-duplicates data for more efficient resource usage and simplifies the sturcture of the Parquet file. +The GeoParquet file must embed the properties in the Parquet metadata in a property named `fiboa`. +The metadata must be JSON-encoded. + The mapping between the Parquet data types and the fiboa data types, can be found in the [data type mapping](datatypes.md). +Related documents: + +- [Examples](examples/) +- [Data type mapping](datatypes.md) + ## Best practices For data with a lot of repetition, brotli compression is recommended. From 7a7f1dcb853580f04256f6cd46a858b1ac7e4778 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 12 Mar 2025 14:02:24 +0100 Subject: [PATCH 09/15] Clarify the use of Structs vs Maps #34 --- CHANGELOG.md | 9 +++++---- geoparquet/datatypes.md | 10 +++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fadecb4..34309fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,11 +30,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed - Various minor clarifications and editorial enhancements -- GeoParquet encoding: Properties that are optional can be omitted if all values are null values -- GeoJSON encoding: Clarify the encoding of the top-level properties (including `links` and `fiboa`) -- GeoJSON encoding: Clarify the use of RFC 7946 -- GeoParquet encoding for bounding boxes and objects - Added descriptions to the allowed values for `determination_method` +- GeoJSON: Clarify the encoding of the top-level properties (including `links` and `fiboa`) +- GeoJSON: Clarify the use of RFC 7946 +- GeoParquet: Properties that are optional can be omitted if all values are null values +- GeoParquet: Added encoding for bounding boxes and objects +- GeoParquet: Clarified the use of Map and Struct data types ## [v0.2.0] - 2024-04-10 diff --git a/geoparquet/datatypes.md b/geoparquet/datatypes.md index f999451..51dd46d 100644 --- a/geoparquet/datatypes.md +++ b/geoparquet/datatypes.md @@ -19,7 +19,7 @@ It also shows the mapping to the GeoParquet data types. | binary | BYTE_ARRAY | as string, base64-encoded | | string
charset: UTF-8 | STRING (BYTE_ARRAY) | yes | | array | LIST | yes | -| object
keys: string
values: any | STRUCT / MAP | yes | +| object
keys: string
values: any | STRUCT or MAP (see below) | yes | | date | DATE (INT32) | as string, compliant to ISO8601 | | date-time
with milliseconds
timezone: UTC | TimestampType (INT64)
isAdjustedToUTC: true
unit: MILLIS
(deprecated: TIMESTAMP_MILLIS) | as string, compliant to ISO8601 | | geometry | BYTE_ARRAY
encoded as WKB | no | @@ -29,6 +29,14 @@ It also shows the mapping to the GeoParquet data types. The integer data types and the data type string can also be mapped to the ENUM data type in Parquet if a pre-defined set of values is available. +## Struct vs Map + +Parquet has both Map and Struct types. The struct type is similar to a named dictionary while the map type is similar to a list of ordered (key, value) pairs. The main difference is that you need to know up-front the keys for the struct type, while you don't for the map type. + +Due to this difference, the **Struct** type can only be used if `additionalProperties` is `false` (the default value) and only `properties` is provided to clearly specify the exact names of the properties. + +Any variability in the keys through the use of `additionalProperties` (except for the default `false`) or `patternProperties` requires the use of the **Map** data type. Please note that the order of the Map type is guaranteed to be preserved. + ## Unsupported Data Types The following data types occur in Parquet, but are not currently supported in fiboa: From 12dd5907899c7a13e387cae58a4b57dd3be6e03e Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 12 Mar 2025 14:15:00 +0100 Subject: [PATCH 10/15] Clarify the relation of other standards and initiatives #35 --- README.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8c2e1e8..08073e3 100644 --- a/README.md +++ b/README.md @@ -29,15 +29,30 @@ The specification in this repository consists of three parts: - [GeoJSON Encoding](geojson/README.md) - [GeoParquet Encoding](geoparquet/README.md) -To completent the specification, there are also best practices and extensions available: +To complement the specification, there are also best practices and extensions available: - [Best Practices](best-practices/README.md) - [Extensions](https://github.com/fiboa/extensions/) -The repository also contains additional information about the project: +## Relation to other standards and working groups -- [Changelog](CHANGELOG.md) -- [Citation Details (as CFF file)](CITATION.cff) +fiboa doesn't aim to reinvent the wheel. +Our aim is to align with existing efforts as much as possible. +Some parts of the specification are already based on the work of other initiatives, +e.g. the determination-related fields in the core specification. + +Related standards and working groups are: + +- [Adapt standard](https://adaptstandard.org), including their [WG17](https://github.com/ADAPT/Standard/issues/97) +- [Varda FieldID](https://www.varda.ag/global-field-id) +- [Deere Boundaries](https://developer.deere.com/dev-docs/boundaries) +- [AgGateway](https://aggateway.org/), including their + [Locking in on Field Boundaries](https://aggateway.org/Portals/1010/WebSite/About%20Us/FIELD%20BOUNDARY%20FLYER%20122123.pdf?ver=2024-01-03-212959-590) initiative + +If you think we are missing relevant work here, we'd love to hear from you. +Please get in touch by [opening an issue](https://github.com/fiboa/specification/issues/new)! + +## Contributing The fiboa community strives to provide a welcoming and transparent environment for all of the project’s participants. You can find additional information about our community best practices and collaborative development processes below: From 5d12142b97994f01750fbf9da69cf7bf42ed46ac Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Wed, 12 Mar 2025 16:00:42 +0100 Subject: [PATCH 11/15] GeoParquet: Renamed Parquet metadata key from `fiboa` to `collection` --- CHANGELOG.md | 1 + .../examples/featurecollection/features.json | 20 +++++++++---------- geoparquet/README.md | 2 +- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34309fc..d12623e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Switched from v0.1.0 to v0.2.0 of the schema language - Renamed `fiboa_extensions` to `schemas` - Schemas must be valid HTTP(S) URLs +- GeoParquet: Renamed Parquet metadata key from `fiboa` to `collection` - GeoJSON: Switched `contentEncoding` for data type `binary` from `binary` to `base64` - GeoJSON FeatureCollection: Collection-level data is provided at the top-level, not in a `fiboa` property diff --git a/geojson/examples/featurecollection/features.json b/geojson/examples/featurecollection/features.json index 745b4ff..c51460f 100644 --- a/geojson/examples/featurecollection/features.json +++ b/geojson/examples/featurecollection/features.json @@ -1,15 +1,13 @@ { - "fiboa": { - "schemas": [ - "https://fiboa.github.io/specification/v0.2.0/schema.yaml", - "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml", - "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml", - "https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml" - ], - "collection": "de_nrw", - "license": "dl-de/by-2-0", - "attribution": "Land Nordrhein-Westfalen / Open.NRW - https://www.opengeodata.nrw.de/produkte/umwelt_klima/bodennutzung/landwirtschaft/" - }, + "schemas": [ + "https://fiboa.github.io/specification/v0.2.0/schema.yaml", + "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml", + "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml", + "https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml" + ], + "collection": "de_nrw", + "license": "dl-de/by-2-0", + "attribution": "Land Nordrhein-Westfalen / Open.NRW - https://www.opengeodata.nrw.de/produkte/umwelt_klima/bodennutzung/landwirtschaft/", "type": "FeatureCollection", "features": [ { diff --git a/geoparquet/README.md b/geoparquet/README.md index 4cfdc36..3af2b42 100644 --- a/geoparquet/README.md +++ b/geoparquet/README.md @@ -16,7 +16,7 @@ i.e. the column can be missing from the GeoParquet file. Properties can also be stored at the [collection-level](../core/README.md#collection) if all values in a column have the same value. This de-duplicates data for more efficient resource usage and simplifies the sturcture of the Parquet file. -The GeoParquet file must embed the properties in the Parquet metadata in a property named `fiboa`. +The GeoParquet file must embed the properties in the Parquet metadata in a property named `collection`. The metadata must be JSON-encoded. The mapping between the Parquet data types and the fiboa data types, can be found in the From 304b9b0325fb4dd708898e5d29024e9d3bbb5ffc Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 13 Mar 2025 01:53:10 +0100 Subject: [PATCH 12/15] Omit nulled GeoJSON properties --- CHANGELOG.md | 1 + geojson/datatypes.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d12623e..13cad05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Schemas must be valid HTTP(S) URLs - GeoParquet: Renamed Parquet metadata key from `fiboa` to `collection` - GeoJSON: Switched `contentEncoding` for data type `binary` from `binary` to `base64` +- GeoJSON data types: `null` is not allowed any longer, instead omit the property - GeoJSON FeatureCollection: Collection-level data is provided at the top-level, not in a `fiboa` property ### Removed diff --git a/geojson/datatypes.md b/geojson/datatypes.md index 661183e..70e167e 100644 --- a/geojson/datatypes.md +++ b/geojson/datatypes.md @@ -24,7 +24,7 @@ It also shows the mapping to the GeoJSON data types. | date-time
with milliseconds
timezone: UTC | string
format: date-time
pattern: Z$ | yes | | geometry | [object with schema](https://geojson.org/schema/Geometry.json) | no | | bounding-box
x and y only, no z | array
minItems: 4
maxItems: 4
items: number | no | -| *if a property is not required* | null | yes | +| *if a property is not required* | omit the JSON property (`null` is not allowed) | yes | ## Potential issues in conversion From 30933a2052ed5552d112df079cbaf595199109e0 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 13 Mar 2025 02:01:30 +0100 Subject: [PATCH 13/15] Clarify handling of missing values --- CHANGELOG.md | 1 + geojson/datatypes.md | 11 ++++++++--- geoparquet/datatypes.md | 7 ++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13cad05..77a7c73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Various minor clarifications and editorial enhancements - Added descriptions to the allowed values for `determination_method` +- Clarified handling of missing values - GeoJSON: Clarify the encoding of the top-level properties (including `links` and `fiboa`) - GeoJSON: Clarify the use of RFC 7946 - GeoParquet: Properties that are optional can be omitted if all values are null values diff --git a/geojson/datatypes.md b/geojson/datatypes.md index 70e167e..c0bece5 100644 --- a/geojson/datatypes.md +++ b/geojson/datatypes.md @@ -12,7 +12,7 @@ It also shows the mapping to the GeoJSON data types. | uint16 | integer
minimum: 0
maximum: 65535 | yes | | int32 | integer
minimum: -2147483648
maximum: 2147483647 | yes | | uint32 | integer
minimum: 0
maximum: 4294967295 | yes | -| int64 | integer
minimum: -9223372036854775808
maximum: 9223372036854775807 | yes | +| int64 | integer
minimum: -9223372036854775808
maximum: 9223372036854775807 | yes | | uint64 | integer
minimum: 0
maximum: 18446744073709551615 | yes | | float
IEEE 32-bit | number
minimum: ?
maximum: ? | yes | | double
IEEE 64-bit | number
minimum: ?
maximum: ? | yes | @@ -22,9 +22,14 @@ It also shows the mapping to the GeoJSON data types. | object
keys: string
values: any | object
additionalProperties: false | yes | | date | string
format: date | yes | | date-time
with milliseconds
timezone: UTC | string
format: date-time
pattern: Z$ | yes | -| geometry | [object with schema](https://geojson.org/schema/Geometry.json) | no | +| geometry | [object with schema](https://geojson.org/schema/Geometry.json) | no | | bounding-box
x and y only, no z | array
minItems: 4
maxItems: 4
items: number | no | -| *if a property is not required* | omit the JSON property (`null` is not allowed) | yes | + +## Missing values + +For optional properties, values might be missing. +This is expressed by omitting the JSON property. +The value `null` is not allowed. ## Potential issues in conversion diff --git a/geoparquet/datatypes.md b/geoparquet/datatypes.md index 51dd46d..734f640 100644 --- a/geoparquet/datatypes.md +++ b/geoparquet/datatypes.md @@ -24,11 +24,16 @@ It also shows the mapping to the GeoParquet data types. | date-time
with milliseconds
timezone: UTC | TimestampType (INT64)
isAdjustedToUTC: true
unit: MILLIS
(deprecated: TIMESTAMP_MILLIS) | as string, compliant to ISO8601 | | geometry | BYTE_ARRAY
encoded as WKB | no | | bounding-box
x and y only, no z | STRUCT(xmin FLOAT, ymin FLOAT, xmax FLOAT, ymax FLOAT) | no | -| *if a property is not required* | [Nullity](https://parquet.apache.org/docs/file-format/nulls/) | yes | The integer data types and the data type string can also be mapped to the ENUM data type in Parquet if a pre-defined set of values is available. +## Missing values + +For optional properties, values might be missing. +This is expressed by providing the values `null` +(see data type [Nullity](https://parquet.apache.org/docs/file-format/nulls/)). + ## Struct vs Map Parquet has both Map and Struct types. The struct type is similar to a named dictionary while the map type is similar to a list of ordered (key, value) pairs. The main difference is that you need to know up-front the keys for the struct type, while you don't for the map type. From 9492edd45093f387c195ec51457e86fc72721d9c Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 13 Mar 2025 02:26:44 +0100 Subject: [PATCH 14/15] Update version numbers --- CITATION.cff | 2 +- README.md | 2 +- core/README.md | 2 +- core/schema/schema.yaml | 4 ++-- geojson/examples/featurecollection/features.json | 2 +- geojson/examples/individual-features/12324.json | 2 +- geojson/examples/individual-features/2713.json | 2 +- geojson/schema/datatypes.json | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 5685691..b2fe93f 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -8,7 +8,7 @@ preferred-citation: type: standard title: "Field Boundaries for Agriculture (fiboa) specification" abstract: "Making field boundaries openly available in a unified way." - version: 0.2.0 + version: 0.3.0 year: 2024 date-released: 2024-04-10 license: Apache-2.0 diff --git a/README.md b/README.md index 08073e3..546f6f3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repository contains the core specification for fiboa, including the data sc For more context, information on the ecosystem, and points of contact see the [fiboa github organization](https://github.com/fiboa/). -- Version: **0.2.0** +- Version: **0.3.0** > [!IMPORTANT] > The fiboa specification is a work in progress. diff --git a/core/README.md b/core/README.md index 3605deb..8b3d45a 100644 --- a/core/README.md +++ b/core/README.md @@ -7,7 +7,7 @@ common definitions are shared across these levels. - A Collection refers to a group of one or more features. - A Feature is a single field geometry with additional properties. -- **Schema:** +- **Schema:** ## Table of Contents diff --git a/core/schema/schema.yaml b/core/schema/schema.yaml index 8886fc2..f86945e 100644 --- a/core/schema/schema.yaml +++ b/core/schema/schema.yaml @@ -1,4 +1,4 @@ -$schema: https://fiboa.github.io/schema/v0.2.0/schema.json +$schema: https://fiboa.github.io/schema/v0.3.0/schema.json required: - schemas - id @@ -13,7 +13,7 @@ properties: contains: type: string enum: - - https://fiboa.github.io/specification/v0.2.0/schema.yaml + - https://fiboa.github.io/specification/v0.3.0/schema.yaml id: type: string minLength: 1 diff --git a/geojson/examples/featurecollection/features.json b/geojson/examples/featurecollection/features.json index c51460f..5402fa8 100644 --- a/geojson/examples/featurecollection/features.json +++ b/geojson/examples/featurecollection/features.json @@ -1,6 +1,6 @@ { "schemas": [ - "https://fiboa.github.io/specification/v0.2.0/schema.yaml", + "https://fiboa.github.io/specification/v0.3.0/schema.yaml", "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml", "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml", "https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml" diff --git a/geojson/examples/individual-features/12324.json b/geojson/examples/individual-features/12324.json index 86a6d82..0f741b1 100644 --- a/geojson/examples/individual-features/12324.json +++ b/geojson/examples/individual-features/12324.json @@ -3,7 +3,7 @@ "type": "Feature", "properties": { "schemas": [ - "https://fiboa.github.io/specification/v0.2.0/schema.yaml", + "https://fiboa.github.io/specification/v0.3.0/schema.yaml", "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" ], "flik": "DENWLI0542130247", diff --git a/geojson/examples/individual-features/2713.json b/geojson/examples/individual-features/2713.json index 7014874..b5c4259 100644 --- a/geojson/examples/individual-features/2713.json +++ b/geojson/examples/individual-features/2713.json @@ -3,7 +3,7 @@ "type": "Feature", "properties": { "schemas": [ - "https://fiboa.github.io/specification/v0.2.0/schema.yaml", + "https://fiboa.github.io/specification/v0.3.0/schema.yaml", "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" ], "flik": "DENWLI0540210084", diff --git a/geojson/schema/datatypes.json b/geojson/schema/datatypes.json index 3c92bb7..ab49171 100644 --- a/geojson/schema/datatypes.json +++ b/geojson/schema/datatypes.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://fiboa.github.io/specification/v0.2.0/geojson/datatypes.json", + "$id": "https://fiboa.github.io/specification/v0.3.0/geojson/datatypes.json", "$defs": { "boolean": { "type": "boolean" From c10e00aed64482687c5a3debd2c54ca924e2a9a8 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 20 Mar 2025 11:20:15 -0500 Subject: [PATCH 15/15] Update to recent discussions --- core/README.md | 37 ++++++++++++------- core/schema/schema.yaml | 6 +++ .../examples/featurecollection/features.json | 14 ++++--- .../examples/individual-features/12324.json | 11 ++++-- .../examples/individual-features/2713.json | 11 ++++-- 5 files changed, 52 insertions(+), 27 deletions(-) diff --git a/core/README.md b/core/README.md index 8b3d45a..defddf0 100644 --- a/core/README.md +++ b/core/README.md @@ -25,12 +25,12 @@ common definitions are shared across these levels. ## General Properties -| Property Name | Data Type | Description | -| ------------- | -------------- | ----------- | -| schemas | array\ | **REQUIRED.** A list of schemas the collection implements. | -| id | string | **REQUIRED.** An identifier for the field. | -| collection | string | The identifier of the parent collection. | -| category | array\ | A set of categories the field boundary belongs to. | +| Property Name | Data Type | Description | +| ------------- | ------------------------------- | ----------- | +| schemas | object\> | **REQUIRED.** A list of schemas the collection implements. | +| id | string | **REQUIRED.** An identifier for the field. | +| collection | string | **REQUIRED.** The identifier of the collection. | +| category | array\ | A set of categories the field boundary belongs to. | ### schemas @@ -39,8 +39,25 @@ Each schema must be a valid HTTP(S) URLs to an existing YAML files compliant to The schema for this specification (see above) is required to be provided. Each `collection` must have a single set of applicable schemas. +The key of the dictionary must be equal to the value provided for the `collection` property. -The schema URI listed above is required to be present in the `schemas` array. +The schema URI for fiboa that is listed above is required to be present. + +**Example for `schemas`:** + +This describes two collections `abc` and `xyz`. + +```json +{ + "abc": [ + "https://fiboa.github.io/specification/v0.3.0/schema.yaml" + ], + "xyz": [ + "https://fiboa.github.io/specification/v0.3.0/schema.yaml", + "https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml", + ] +} +``` ### id @@ -50,12 +67,6 @@ It must be unique per collection, i.e. `collection` and `id` form a unique ident A collection is a group of one or more features with a unique identifier, stored in the `collection` property. -The collection identifier is usually only needed for merged datasets and it is **required** in this case. -Implementations may create collection identifiers if datasets that don't provide a collection identifer are getting merged. -A validatior can't know whether the `collection` property is required, the data providers or tooling must handle this, -i.e. if data from two different sources is merged, a `collection` property with distinct values must be provided. -This ensures unique IDs through the combination of the properties `id` and `collection`. - Encodings may support to store properties that consists of the same value across all features at the collection-level. This de-duplicates data for more efficient resource usage, but only applies if more than two features are available for the collection. The specific location and behaviour of collection-level data is specified in the encoding-specific specifications. diff --git a/core/schema/schema.yaml b/core/schema/schema.yaml index f86945e..85522f8 100644 --- a/core/schema/schema.yaml +++ b/core/schema/schema.yaml @@ -2,7 +2,13 @@ $schema: https://fiboa.github.io/schema/v0.3.0/schema.json required: - schemas - id + - collection - geometry +collection: + schemas: true + id: false + geometry: false + bbox: false properties: schemas: type: array diff --git a/geojson/examples/featurecollection/features.json b/geojson/examples/featurecollection/features.json index 5402fa8..d31c1af 100644 --- a/geojson/examples/featurecollection/features.json +++ b/geojson/examples/featurecollection/features.json @@ -1,10 +1,12 @@ { - "schemas": [ - "https://fiboa.github.io/specification/v0.3.0/schema.yaml", - "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml", - "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml", - "https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml" - ], + "schemas": { + "de_nrw": [ + "https://fiboa.github.io/specification/v0.3.0/schema.yaml", + "https://fiboa.github.io/inspire-extension/v0.2.0/schema.yaml", + "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml", + "https://fiboa.github.io/crop-extension/v0.1.0/schema.yaml" + ] + }, "collection": "de_nrw", "license": "dl-de/by-2-0", "attribution": "Land Nordrhein-Westfalen / Open.NRW - https://www.opengeodata.nrw.de/produkte/umwelt_klima/bodennutzung/landwirtschaft/", diff --git a/geojson/examples/individual-features/12324.json b/geojson/examples/individual-features/12324.json index 0f741b1..2f8b87d 100644 --- a/geojson/examples/individual-features/12324.json +++ b/geojson/examples/individual-features/12324.json @@ -2,10 +2,13 @@ "id": "12324", "type": "Feature", "properties": { - "schemas": [ - "https://fiboa.github.io/specification/v0.3.0/schema.yaml", - "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" - ], + "schemas": { + "example": [ + "https://fiboa.github.io/specification/v0.3.0/schema.yaml", + "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" + ] + }, + "collection": "example", "flik": "DENWLI0542130247", "determination_datetime": "2005-02-28T00:00:00Z", "nutz_code": "A", diff --git a/geojson/examples/individual-features/2713.json b/geojson/examples/individual-features/2713.json index b5c4259..d53f07a 100644 --- a/geojson/examples/individual-features/2713.json +++ b/geojson/examples/individual-features/2713.json @@ -2,10 +2,13 @@ "id": "2713", "type": "Feature", "properties": { - "schemas": [ - "https://fiboa.github.io/specification/v0.3.0/schema.yaml", - "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" - ], + "schemas": { + "example": [ + "https://fiboa.github.io/specification/v0.3.0/schema.yaml", + "https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml" + ] + }, + "collection": "example", "flik": "DENWLI0540210084", "determination_datetime": "2005-02-28T00:00:00Z", "nutz_code": "A",