From fca695bf0a6887f6cd7455468ef56fcfb963e82e Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Mon, 19 May 2025 22:15:14 +0200 Subject: [PATCH 1/5] add datetime validation for collection time intervals --- CHANGELOG.md | 2 + CONTRIBUTING.md | 26 ++++++++++++ stac_pydantic/collection.py | 4 +- .../v1.0.0/example-collection-list.json | 4 +- tests/api/test_collections.py | 2 +- tests/conftest.py | 2 +- tests/test_models.py | 40 +++++++++++++++++-- 7 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 98a2386..0d1d34c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ ## Unreleased +- Add datetime validation for collection's time intervals (Must follow [`RFC 3339, section 5.6.`](https://datatracker.ietf.org/doc/html/rfc3339#section-5.6)) + ## 3.2.0 (2025-03-20) - Move `validate_bbox` and `validate_datetime` field validation functions outside the Search class (to enable re-utilization) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..7017d87 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,26 @@ +# Contributing + +Issues and pull requests are more than welcome. + +**dev install** + +```bash +git clone https://github.com/stac-utils/stac-pydantic.git +cd stac-pydantic +python -m pip install -e ".[dev]" +``` + +You can then run the tests with the following command: + +```sh +python -m pytest --cov stac_pydantic --cov-report term-missing +``` + + +**pre-commit** + +This repo is set to use `pre-commit` to run *ruff*, *pydocstring* and mypy when committing new code. + +```bash +pre-commit install +``` diff --git a/stac_pydantic/collection.py b/stac_pydantic/collection.py index 4b4cae8..a0ae411 100644 --- a/stac_pydantic/collection.py +++ b/stac_pydantic/collection.py @@ -3,7 +3,7 @@ from pydantic import Field from stac_pydantic.catalog import _Catalog -from stac_pydantic.shared import Asset, NumType, Provider, StacBaseModel +from stac_pydantic.shared import Asset, NumType, Provider, StacBaseModel, UtcDatetime class SpatialExtent(StacBaseModel): @@ -19,7 +19,7 @@ class TimeInterval(StacBaseModel): https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#temporal-extent-object """ - interval: List[List[Union[str, None]]] + interval: List[List[Union[UtcDatetime, None]]] class Extent(StacBaseModel): diff --git a/tests/api/examples/v1.0.0/example-collection-list.json b/tests/api/examples/v1.0.0/example-collection-list.json index 5a8f6f6..3040b03 100644 --- a/tests/api/examples/v1.0.0/example-collection-list.json +++ b/tests/api/examples/v1.0.0/example-collection-list.json @@ -63,8 +63,8 @@ "temporal":{ "interval":[ [ - "2000-03-04T12:00:00.000000Z", - "2006-12-31T12:00:00.000000Z" + "2000-03-04T12:00:00Z", + "2006-12-31T12:00:00Z" ] ] } diff --git a/tests/api/test_collections.py b/tests/api/test_collections.py index 0cf7293..18d7dd8 100644 --- a/tests/api/test_collections.py +++ b/tests/api/test_collections.py @@ -9,5 +9,5 @@ def test_collection_list(): test_collection_list = request(EXAMPLE_COLLECTION_LIST, PATH) - valid_collection_list = Collections(**test_collection_list).model_dump() + valid_collection_list = Collections(**test_collection_list).model_dump(mode="json") dict_match(test_collection_list, valid_collection_list) diff --git a/tests/conftest.py b/tests/conftest.py index 9152816..e83dc5b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,7 +48,7 @@ def dict_match(d1: dict, d2: dict): # to compare the values as datetime objects. elif "datetime" in diff[1]: dates = [ - UtcDatetimeAdapter.validate_strings(date) + UtcDatetimeAdapter.validate_strings(date, strict=True) if isinstance(date, str) else date for date in diff[2] diff --git a/tests/test_models.py b/tests/test_models.py index 6e10a2f..2c497dd 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -6,6 +6,7 @@ from shapely.geometry import shape from stac_pydantic import Collection, Item, ItemProperties +from stac_pydantic.collection import TimeInterval from stac_pydantic.extensions import _fetch_and_cache_schema, validate_extensions from stac_pydantic.links import Link, Links from stac_pydantic.shared import MimeTypes, StacCommonMetadata @@ -81,13 +82,13 @@ def test_version_extension_item() -> None: def test_version_extension_collection() -> None: test_coll = request(VERSION_EXTENSION_COLLECTION) - valid_coll = Collection(**test_coll).model_dump() + valid_coll = Collection(**test_coll).model_dump(mode="json") dict_match(test_coll, valid_coll) def test_item_assets_extension() -> None: test_coll = request(ITEM_ASSET_EXTENSION) - valid_coll = Collection(**test_coll).model_dump() + valid_coll = Collection(**test_coll).model_dump(mode="json") dict_match(test_coll, valid_coll) @@ -139,7 +140,9 @@ def test_extension_validation_schema_cache() -> None: def test_to_json(infile, model): test_item = request(infile) validated = model(**test_item) - dict_match(json.loads(validated.model_dump_json()), validated.model_dump()) + dict_match( + json.loads(validated.model_dump_json()), validated.model_dump(mode="json") + ) def test_item_to_json() -> None: @@ -345,3 +348,34 @@ def test_item_bbox_validation() -> None: test_item["bbox"] = None with pytest.raises(ValueError, match="bbox is required if geometry is not null"): Item(**test_item) + + +@pytest.mark.parametrize( + "interval", + [ + [[None, "yo"]], + [["yo", None]], + [["yo", "yo"]], + ], +) +def test_time_intervals_invalid(interval) -> None: + """Check Time Interval model.""" + with pytest.raises(ValidationError): + TimeInterval(interval=interval) + + +@pytest.mark.parametrize( + "interval", + [ + [["2024-01-01T00:00:00Z", None]], + [[None, "2024-01-01T00:00:00Z"]], + [["2023-01-01T00:00:00Z", "2024-01-01T00:00:00Z"]], + [ + ["2023-01-01T00:00:00Z", "2024-01-01T00:00:00Z"], + ["2023-01-31T00:00:00Z", "2024-01-01T00:00:00Z"], + ], + ], +) +def test_time_intervals_valid(interval) -> None: + """Check Time Interval model.""" + assert TimeInterval(interval=interval) From 9bf79500540a5d3dafaacacd1a37b930409971da Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Tue, 20 May 2025 18:49:56 +0200 Subject: [PATCH 2/5] validate time intervals --- stac_pydantic/api/search.py | 83 ++++--------------------------------- stac_pydantic/collection.py | 46 +++++++++++++++++--- stac_pydantic/shared.py | 76 ++++++++++++++++++++++++++++++++- tests/test_models.py | 11 +++++ 4 files changed, 135 insertions(+), 81 deletions(-) diff --git a/stac_pydantic/api/search.py b/stac_pydantic/api/search.py index f37ae5c..3169ce2 100644 --- a/stac_pydantic/api/search.py +++ b/stac_pydantic/api/search.py @@ -1,5 +1,5 @@ from datetime import datetime as dt -from typing import Any, Dict, List, Optional, Tuple, Union, cast +from typing import Any, Dict, List, Optional, Union from geojson_pydantic.geometries import ( GeometryCollection, @@ -10,13 +10,18 @@ Point, Polygon, ) -from pydantic import AfterValidator, BaseModel, Field, TypeAdapter, model_validator +from pydantic import AfterValidator, BaseModel, Field, model_validator from typing_extensions import Annotated from stac_pydantic.api.extensions.fields import FieldsExtension from stac_pydantic.api.extensions.query import Operator from stac_pydantic.api.extensions.sort import SortExtension -from stac_pydantic.shared import BBox, UtcDatetime +from stac_pydantic.shared import ( + BBox, + str_to_datetimes, + validate_bbox, + validate_datetime, +) Intersection = Union[ Point, @@ -28,78 +33,6 @@ GeometryCollection, ] -SearchDatetime = TypeAdapter(Optional[UtcDatetime]) - - -def validate_bbox(v: Optional[BBox]) -> Optional[BBox]: - """Validate BBOX value.""" - if v: - # Validate order - if len(v) == 4: - xmin, ymin, xmax, ymax = cast(Tuple[int, int, int, int], v) - - elif len(v) == 6: - xmin, ymin, min_elev, xmax, ymax, max_elev = cast( - Tuple[int, int, int, int, int, int], v - ) - if max_elev < min_elev: - raise ValueError( - "Maximum elevation must greater than minimum elevation" - ) - else: - raise ValueError("Bounding box must have 4 or 6 coordinates") - - # Validate against WGS84 - if xmin < -180 or ymin < -90 or xmax > 180 or ymax > 90: - raise ValueError("Bounding box must be within (-180, -90, 180, 90)") - - if ymax < ymin: - raise ValueError("Maximum latitude must be greater than minimum latitude") - - return v - - -def str_to_datetimes(value: str) -> List[Optional[dt]]: - # Split on "/" and replace no value or ".." with None - values = [v if v and v != ".." else None for v in value.split("/")] - - # Cast because pylance gets confused by the type adapter and annotated type - dates = cast( - List[Optional[dt]], - [ - # Use the type adapter to validate the datetime strings, strict is necessary - # due to pydantic issues #8736 and #8762 - SearchDatetime.validate_strings(v, strict=True) if v else None - for v in values - ], - ) - return dates - - -def validate_datetime(v: Optional[str]) -> Optional[str]: - """Validate Datetime value.""" - if v is not None: - dates = str_to_datetimes(v) - - # If there are more than 2 dates, it's invalid - if len(dates) > 2: - raise ValueError( - "Invalid datetime range. Too many values. Must match format: {begin_date}/{end_date}" - ) - - # If there is only one date, duplicate to use for both start and end dates - if len(dates) == 1: - dates = [dates[0], dates[0]] - - # If there is a start and end date, check that the start date is before the end date - if dates[0] and dates[1] and dates[0] > dates[1]: - raise ValueError( - "Invalid datetime range. Begin date after end date. " - "Must match format: {begin_date}/{end_date}" - ) - - return v - class Search(BaseModel): """ diff --git a/stac_pydantic/collection.py b/stac_pydantic/collection.py index a0ae411..186375a 100644 --- a/stac_pydantic/collection.py +++ b/stac_pydantic/collection.py @@ -1,9 +1,45 @@ -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Union -from pydantic import Field +from pydantic import AfterValidator, Field +from typing_extensions import Annotated from stac_pydantic.catalog import _Catalog -from stac_pydantic.shared import Asset, NumType, Provider, StacBaseModel, UtcDatetime +from stac_pydantic.shared import ( + Asset, + BBox, + NumType, + Provider, + StacBaseModel, + UtcDatetime, +) + +TInterval = Tuple[Union[UtcDatetime, None], Union[UtcDatetime, None]] + + +def validate_time_interval(v: List[TInterval]) -> List[TInterval]: + ivalues = iter(v) + + # The first time interval always describes the overall temporal extent of the data. + start, end = next(ivalues) + if start and end: + assert start < end, f"`Start` time {start} older than `End` time {end}" + + # All subsequent time intervals can be used to provide a more precise + # description of the extent and identify clusters of data. + for s, e in ivalues: + if start and s: + if start > s: + raise ValueError( + f"`Overall Start` time {start} older than `Start` time {s}" + ) + + if end and e: + if e > end: + raise ValueError( + f"`End` time {e} older than `Overall Start` time {end}" + ) + + return v class SpatialExtent(StacBaseModel): @@ -11,7 +47,7 @@ class SpatialExtent(StacBaseModel): https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#spatial-extent-object """ - bbox: List[List[NumType]] + bbox: List[BBox] class TimeInterval(StacBaseModel): @@ -19,7 +55,7 @@ class TimeInterval(StacBaseModel): https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#temporal-extent-object """ - interval: List[List[Union[UtcDatetime, None]]] + interval: Annotated[List[TInterval], AfterValidator(validate_time_interval)] class Extent(StacBaseModel): diff --git a/stac_pydantic/shared.py b/stac_pydantic/shared.py index ab55a05..df55bac 100644 --- a/stac_pydantic/shared.py +++ b/stac_pydantic/shared.py @@ -1,6 +1,7 @@ +from datetime import datetime as dt from datetime import timezone from enum import Enum, auto -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union, cast from warnings import warn from pydantic import ( @@ -9,6 +10,7 @@ BaseModel, ConfigDict, Field, + TypeAdapter, model_validator, ) from typing_extensions import Annotated, Self @@ -32,6 +34,8 @@ AfterValidator(lambda d: d.astimezone(timezone.utc)), ] +SearchDatetime = TypeAdapter(Optional[UtcDatetime]) + class MimeTypes(str, Enum): """ @@ -196,3 +200,73 @@ class Asset(StacBaseModel): model_config = ConfigDict( populate_by_name=True, use_enum_values=True, extra="allow" ) + + +def str_to_datetimes(value: str) -> List[Optional[dt]]: + # Split on "/" and replace no value or ".." with None + values = [v if v and v != ".." else None for v in value.split("/")] + + # Cast because pylance gets confused by the type adapter and annotated type + dates = cast( + List[Optional[dt]], + [ + # Use the type adapter to validate the datetime strings, strict is necessary + # due to pydantic issues #8736 and #8762 + SearchDatetime.validate_strings(v, strict=True) if v else None + for v in values + ], + ) + return dates + + +def validate_datetime(v: Optional[str]) -> Optional[str]: + """Validate Datetime value.""" + if v is not None: + dates = str_to_datetimes(v) + + # If there are more than 2 dates, it's invalid + if len(dates) > 2: + raise ValueError( + "Invalid datetime range. Too many values. Must match format: {begin_date}/{end_date}" + ) + + # If there is only one date, duplicate to use for both start and end dates + if len(dates) == 1: + dates = [dates[0], dates[0]] + + # If there is a start and end date, check that the start date is before the end date + if dates[0] and dates[1] and dates[0] > dates[1]: + raise ValueError( + "Invalid datetime range. Begin date after end date. " + "Must match format: {begin_date}/{end_date}" + ) + + return v + + +def validate_bbox(v: Optional[BBox]) -> Optional[BBox]: + """Validate BBOX value.""" + if v: + # Validate order + if len(v) == 4: + xmin, ymin, xmax, ymax = cast(Tuple[int, int, int, int], v) + + elif len(v) == 6: + xmin, ymin, min_elev, xmax, ymax, max_elev = cast( + Tuple[int, int, int, int, int, int], v + ) + if max_elev < min_elev: + raise ValueError( + "Maximum elevation must greater than minimum elevation" + ) + else: + raise ValueError("Bounding box must have 4 or 6 coordinates") + + # Validate against WGS84 + if xmin < -180 or ymin < -90 or xmax > 180 or ymax > 90: + raise ValueError("Bounding box must be within (-180, -90, 180, 90)") + + if ymax < ymin: + raise ValueError("Maximum latitude must be greater than minimum latitude") + + return v diff --git a/tests/test_models.py b/tests/test_models.py index 2c497dd..5b12ad5 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -356,6 +356,17 @@ def test_item_bbox_validation() -> None: [[None, "yo"]], [["yo", None]], [["yo", "yo"]], + [["2024-01-01T00:00:00Z", "2023-01-01T00:00:00Z"]], + # sub-sequent starttime before overall starttime + [ + ["2023-01-01T00:00:00Z", "2024-01-01T00:00:00Z"], + ["2022-01-31T00:00:00Z", "2024-01-01T00:00:00Z"], + ], + # sub-sequent endtime after overall endtime + [ + ["2023-01-01T00:00:00Z", "2024-01-01T00:00:00Z"], + ["2023-01-31T00:00:00Z", "2024-01-02T00:00:00Z"], + ], ], ) def test_time_intervals_invalid(interval) -> None: From 6c03b3721bb82dc53613bdea6305301a77f5a946 Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Tue, 20 May 2025 19:16:07 +0200 Subject: [PATCH 3/5] more check --- stac_pydantic/collection.py | 7 ++++++- tests/test_models.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/stac_pydantic/collection.py b/stac_pydantic/collection.py index 186375a..018bfe5 100644 --- a/stac_pydantic/collection.py +++ b/stac_pydantic/collection.py @@ -22,11 +22,16 @@ def validate_time_interval(v: List[TInterval]) -> List[TInterval]: # The first time interval always describes the overall temporal extent of the data. start, end = next(ivalues) if start and end: - assert start < end, f"`Start` time {start} older than `End` time {end}" + if start > end: + raise ValueError(f"`Start` time {start} older than `End` time {end}") # All subsequent time intervals can be used to provide a more precise # description of the extent and identify clusters of data. for s, e in ivalues: + if s and e: + if s > e: + raise ValueError(f"`Start` time {s} older than `End` time {e}") + if start and s: if start > s: raise ValueError( diff --git a/tests/test_models.py b/tests/test_models.py index 5b12ad5..55b9885 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -367,6 +367,11 @@ def test_item_bbox_validation() -> None: ["2023-01-01T00:00:00Z", "2024-01-01T00:00:00Z"], ["2023-01-31T00:00:00Z", "2024-01-02T00:00:00Z"], ], + # sub-sequent starttime after sub-sequent endtime + [ + ["2023-01-01T00:00:00Z", "2024-01-01T00:00:00Z"], + ["2023-01-31T00:00:00Z", "2023-01-01T00:00:00Z"], + ], ], ) def test_time_intervals_invalid(interval) -> None: From 3c4c0704b4ce8305f16fc73f1a967d64491002f0 Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Tue, 20 May 2025 21:16:33 +0200 Subject: [PATCH 4/5] more test and type --- .pre-commit-config.yaml | 2 +- stac_pydantic/collection.py | 22 +++++++++++++++------- tests/test_models.py | 12 ++++++++++++ 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c1b70d3..0112fe7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.9.0 + rev: v1.15.0 hooks: - id: mypy language_version: python diff --git a/stac_pydantic/collection.py b/stac_pydantic/collection.py index 018bfe5..dc29a83 100644 --- a/stac_pydantic/collection.py +++ b/stac_pydantic/collection.py @@ -1,6 +1,6 @@ -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, List, Literal, Optional, Union -from pydantic import AfterValidator, Field +from pydantic import AfterValidator, Field, conlist from typing_extensions import Annotated from stac_pydantic.catalog import _Catalog @@ -13,14 +13,16 @@ UtcDatetime, ) -TInterval = Tuple[Union[UtcDatetime, None], Union[UtcDatetime, None]] - -def validate_time_interval(v: List[TInterval]) -> List[TInterval]: +def validate_time_interval(v): # noqa: C901 ivalues = iter(v) # The first time interval always describes the overall temporal extent of the data. - start, end = next(ivalues) + overall_interval = next(ivalues, None) + if not overall_interval: + return v + + start, end = overall_interval if start and end: if start > end: raise ValueError(f"`Start` time {start} older than `End` time {end}") @@ -60,7 +62,13 @@ class TimeInterval(StacBaseModel): https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#temporal-extent-object """ - interval: Annotated[List[TInterval], AfterValidator(validate_time_interval)] + interval: Annotated[ # type: ignore + conlist( + conlist(Union[UtcDatetime, None], min_length=2, max_length=2), + min_length=1, + ), + AfterValidator(validate_time_interval), + ] class Extent(StacBaseModel): diff --git a/tests/test_models.py b/tests/test_models.py index 55b9885..78d923d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -353,9 +353,19 @@ def test_item_bbox_validation() -> None: @pytest.mark.parametrize( "interval", [ + # interval should at least have 1 value + [], + # interval item should be an array of 2 datetime + ["2023-01-01T00:00:00Z", "2024-01-01T00:00:00Z"], + # interval item should not be null + [[]], + # interval item should have 2 values + [["2023-01-01T00:00:00Z", "2023-01-02T00:00:00Z", "2024-01-01T00:00:00Z"]], + # invalid datetime values [[None, "yo"]], [["yo", None]], [["yo", "yo"]], + # wrong datetime order [["2024-01-01T00:00:00Z", "2023-01-01T00:00:00Z"]], # sub-sequent starttime before overall starttime [ @@ -383,6 +393,8 @@ def test_time_intervals_invalid(interval) -> None: @pytest.mark.parametrize( "interval", [ + # Open date range to both sides is supported but not recommended. + [[None, None]], [["2024-01-01T00:00:00Z", None]], [[None, "2024-01-01T00:00:00Z"]], [["2023-01-01T00:00:00Z", "2024-01-01T00:00:00Z"]], From ac134e88091cf4c2318b669ce132cbb51b42bf6a Mon Sep 17 00:00:00 2001 From: vincentsarago Date: Tue, 20 May 2025 21:51:45 +0200 Subject: [PATCH 5/5] rewrite type --- stac_pydantic/collection.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/stac_pydantic/collection.py b/stac_pydantic/collection.py index dc29a83..a5f7b14 100644 --- a/stac_pydantic/collection.py +++ b/stac_pydantic/collection.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Literal, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union from pydantic import AfterValidator, Field, conlist from typing_extensions import Annotated @@ -13,8 +13,15 @@ UtcDatetime, ) +if TYPE_CHECKING: + StartEndTime = List[Union[UtcDatetime, None]] + TInterval = List[StartEndTime] +else: + StartEndTime = conlist(Union[UtcDatetime, None], min_length=2, max_length=2) + TInterval = conlist(StartEndTime, min_length=1) -def validate_time_interval(v): # noqa: C901 + +def validate_time_interval(v: TInterval) -> TInterval: # noqa: C901 ivalues = iter(v) # The first time interval always describes the overall temporal extent of the data. @@ -62,13 +69,7 @@ class TimeInterval(StacBaseModel): https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#temporal-extent-object """ - interval: Annotated[ # type: ignore - conlist( - conlist(Union[UtcDatetime, None], min_length=2, max_length=2), - min_length=1, - ), - AfterValidator(validate_time_interval), - ] + interval: Annotated[TInterval, AfterValidator(validate_time_interval)] class Extent(StacBaseModel):