From d8f820f44144ddff1dcca73d338e032e2b2aa3ab Mon Sep 17 00:00:00 2001 From: Vincent Privat Date: Tue, 5 Aug 2025 18:20:50 +0200 Subject: [PATCH] Support stac-check config_file --- CHANGELOG.md | 2 ++ src/stac_api_validator/__main__.py | 6 ++++ src/stac_api_validator/validations.py | 31 +++++++++++++++-- tests/resources/stac-check-config.yaml | 46 ++++++++++++++++++++++++++ tests/test_validations.py | 9 +++++ 5 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 tests/resources/stac-check-config.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index dff0233..cc88662 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Support stac-check config_file ([#687](https://github.com/stac-utils/stac-api-validator/pull/687)) + ## [0.6.6] - 2025-07-29 Dependency updates. diff --git a/src/stac_api_validator/__main__.py b/src/stac_api_validator/__main__.py index db983c7..07ecbf1 100644 --- a/src/stac_api_validator/__main__.py +++ b/src/stac_api_validator/__main__.py @@ -137,6 +137,10 @@ multiple=True, help="Headers to attach to the main request and dependent pystac requests, curl syntax", ) +@click.option( + "--stac-check-config", + help="Path to a YAML stac-check configuration file", +) def main( log_level: str, root_url: str, @@ -162,6 +166,7 @@ def main( query_in_values: Optional[str] = None, transaction_collection: Optional[str] = None, headers: Optional[List[str]] = None, + stac_check_config: Optional[str] = None, ) -> int: """STAC API Validator.""" logging.basicConfig(stream=sys.stdout, level=log_level) @@ -202,6 +207,7 @@ def main( ), transaction_collection=transaction_collection, headers=processed_headers, + stac_check_config=stac_check_config, ) except Exception as e: click.secho( diff --git a/src/stac_api_validator/validations.py b/src/stac_api_validator/validations.py index 1c126dd..8589876 100644 --- a/src/stac_api_validator/validations.py +++ b/src/stac_api_validator/validations.py @@ -375,10 +375,16 @@ def stac_check( method: Method = Method.GET, open_assets_urls: bool = True, headers: Optional[dict] = None, + config_file: Optional[str] = None, ) -> None: try: logger.debug(f"stac-check validation: {url}") - linter = Linter(url, assets_open_urls=open_assets_urls, headers=headers or {}) + linter = Linter( + url, + config_file=config_file, + assets_open_urls=open_assets_urls, + headers=headers or {}, + ) if not linter.valid_stac: errors += f"[{context}] : {method} {url} is not a valid STAC object: {linter.error_msg}" if msgs := linter.best_practices_msg[1:]: # first msg is a header, so skip @@ -560,6 +566,7 @@ def validate_api( transaction_collection: Optional[str], headers: Optional[Dict[str, str]], open_assets_urls: bool = True, + stac_check_config: Optional[str] = None, ) -> Tuple[Warnings, Errors]: warnings = Warnings() errors = Errors() @@ -611,7 +618,13 @@ def validate_api( if "collections" in ccs_to_validate: logger.info("Validating STAC API - Collections conformance class.") validate_collections( - landing_page_body, collection, errors, warnings, r_session, open_assets_urls + landing_page_body, + collection, + errors, + warnings, + r_session, + open_assets_urls, + stac_check_config, ) conforms_to = landing_page_body.get("conformsTo", []) @@ -619,7 +632,13 @@ def validate_api( if "features" in ccs_to_validate: logger.info("Validating STAC API - Features conformance class.") validate_collections( - landing_page_body, collection, errors, warnings, r_session, open_assets_urls + landing_page_body, + collection, + errors, + warnings, + r_session, + open_assets_urls, + stac_check_config, ) validate_features( landing_page_body, @@ -631,6 +650,7 @@ def validate_api( r_session, validate_pagination, open_assets_urls, + stac_check_config, ) if "transaction" in ccs_to_validate: @@ -982,6 +1002,7 @@ def validate_collections( warnings: Warnings, r_session: Session, open_assets_urls: bool = True, + stac_check_config: Optional[str] = None, ) -> None: if not (data_link := link_by_rel(root_body["links"], "data")): errors += f"[{Context.COLLECTIONS}] /: Link[rel=data] must href /collections" @@ -1091,6 +1112,7 @@ def validate_collections( Method.GET, open_assets_urls, r_session.headers, + stac_check_config, ) # todo: collection pagination @@ -1106,6 +1128,7 @@ def validate_features( r_session: Session, validate_pagination: bool, open_assets_urls: bool = True, + stac_check_config: Optional[str] = None, ) -> None: if not geometry: errors += f"[{Context.FEATURES}] Geometry parameter required for running Features validations." @@ -1216,6 +1239,7 @@ def validate_features( Method.GET, open_assets_urls, r_session.headers, + stac_check_config, ) # Validate Features non-existent item @@ -1326,6 +1350,7 @@ def validate_features( Method.GET, open_assets_urls, r_session.headers, + stac_check_config, ) if validate_pagination: diff --git a/tests/resources/stac-check-config.yaml b/tests/resources/stac-check-config.yaml new file mode 100644 index 0000000..d48bf8b --- /dev/null +++ b/tests/resources/stac-check-config.yaml @@ -0,0 +1,46 @@ +linting: + # Identifiers should consist of only lowercase characters, numbers, '_', and '-' + searchable_identifiers: true + # Item name '{self.object_id}' should not contain ':' or '/' + percent_encoded: true + # Item file names should match their ids + item_id_file_name: true + # Collections and catalogs should be named collection.json and catalog.json + catalog_id_file_name: true + # A STAC collection should contain a summaries field + check_summaries: true + # Datetime fields should not be set to null + null_datetime: true + # best practices - check unlocated items to make sure bbox field is not set + check_unlocated: true + # best practices - recommend items have a geometry + check_geometry: true + # check to see if there are too many links + bloated_links: true + # best practices - check for bloated metadata in properties + bloated_metadata: true + # best practices - ensure thumbnail is a small file size ["png", "jpeg", "jpg", "webp"] + check_thumbnail: true + # best practices - ensure that links in catalogs and collections include a title field + links_title: true + # best practices - ensure that links in catalogs and collections include self link + links_self: true + +# Geometry validation settings [BETA] +geometry_validation: + # Master switch to enable/disable all geometry validation checks + enabled: true + # check if geometry coordinates are potentially ordered incorrectly (longitude, latitude) + geometry_coordinates_order: true + # check if geometry coordinates contain definite errors (latitude > ±90°, longitude > ±180°) + geometry_coordinates_definite_errors: true + # check if bbox matches the bounds of the geometry + bbox_geometry_match: true + # check if a bbox that crosses the antimeridian is correctly formatted + bbox_antimeridian: true + +settings: + # number of links before the bloated links warning is shown + max_links: 20 + # number of properties before the bloated metadata warning is shown + max_properties: 20 diff --git a/tests/test_validations.py b/tests/test_validations.py index ab7afe2..96846a4 100644 --- a/tests/test_validations.py +++ b/tests/test_validations.py @@ -44,6 +44,13 @@ def sample_item() -> Generator[pystac.Item, None, None]: yield pystac.Item.from_dict(data) +@pytest.fixture +def stac_check_config() -> Generator[str, None, None]: + current_path = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) + + yield current_path / "resources" / "stac-check-config.yaml" + + @pytest.fixture def expected_headers(requests_version: str) -> Generator[Dict[str, str], None, None]: yield { @@ -91,6 +98,7 @@ def test_validate_api( request: pytest.FixtureRequest, r_session: requests.Session, expected_headers: Dict[str, str], + stac_check_config: str, ) -> None: if request.config.getoption("typeguard_packages"): pytest.skip( @@ -114,6 +122,7 @@ def test_validate_api( query_config=None, transaction_collection=None, headers=headers, + stac_check_config=stac_check_config, ) assert retrieve_mock.call_count == 1 r_session = retrieve_mock.call_args.args[-1]