From c488a390e8ab19a3c8267b0f70f7b1802a1cea95 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Wed, 19 Nov 2025 14:26:06 +0100 Subject: [PATCH 01/12] feat: add queryables cache and optional validation for search parameters --- stac_fastapi/core/stac_fastapi/core/core.py | 15 ++ .../stac_fastapi/sfeos_helpers/queryables.py | 131 +++++++++++++ .../tests/api/test_api_query_validation.py | 67 +++++++ stac_fastapi/tests/data/test_collection.json | 5 + .../tests/sfeos_helpers/test_queryables.py | 178 ++++++++++++++++++ 5 files changed, 396 insertions(+) create mode 100644 stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py create mode 100644 stac_fastapi/tests/api/test_api_query_validation.py create mode 100644 stac_fastapi/tests/sfeos_helpers/test_queryables.py diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 56d2333fc..f4d1075f2 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -39,6 +39,11 @@ BulkTransactionMethod, Items, ) +from stac_fastapi.sfeos_helpers.queryables import ( + get_properties_from_cql2_filter, + initialize_queryables_cache, + validate_queryables, +) from stac_fastapi.types import stac as stac_types from stac_fastapi.types.conformance import BASE_CONFORMANCE_CLASSES from stac_fastapi.types.core import AsyncBaseCoreClient @@ -88,6 +93,10 @@ class CoreClient(AsyncBaseCoreClient): title: str = attr.ib(default="stac-fastapi") description: str = attr.ib(default="stac-fastapi") + def __attrs_post_init__(self): + """Initialize the queryables cache.""" + initialize_queryables_cache(self.database) + def _landing_page( self, base_url: str, @@ -815,6 +824,8 @@ async def post_search( ) if hasattr(search_request, "query") and getattr(search_request, "query"): + query_fields = set(getattr(search_request, "query").keys()) + await validate_queryables(query_fields) for field_name, expr in getattr(search_request, "query").items(): field = "properties__" + field_name for op, value in expr.items(): @@ -833,7 +844,11 @@ async def post_search( if cql2_filter is not None: try: + query_fields = get_properties_from_cql2_filter(cql2_filter) + await validate_queryables(query_fields) search = await self.database.apply_cql2_filter(search, cql2_filter) + except HTTPException: + raise except Exception as e: raise HTTPException( status_code=400, detail=f"Error with cql2 filter: {e}" diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py new file mode 100644 index 000000000..fb1065b3e --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py @@ -0,0 +1,131 @@ +"""A module for managing queryable attributes.""" + +import asyncio +import os +import time +from typing import Any, Dict, List, Optional, Set + +from fastapi import HTTPException + +from stac_fastapi.core.base_database_logic import BaseDatabaseLogic + + +class QueryablesCache: + """A thread-safe, time-based cache for queryable properties.""" + + def __init__(self, database_logic: Any): + """ + Initialize the QueryablesCache. + + Args: + database_logic: An instance of a class with a `get_queryables_mapping` method. + """ + self._db_logic = database_logic + self._cache: Dict[str, List[str]] = {} + self._all_queryables: Set[str] = set() + self._last_updated: float = 0 + self._lock = asyncio.Lock() + self.validation_enabled: bool = False + self.cache_ttl: int = 3600 # How often to refresh cache (in seconds) + self.reload_settings() + + def reload_settings(self): + """Reload settings from environment variables.""" + self.validation_enabled = ( + os.getenv("VALIDATE_QUERYABLES", "false").lower() == "true" + ) + self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "3600")) + + async def _update_cache(self): + """Update the cache with the latest queryables from the database.""" + if not self.validation_enabled: + return + + async with self._lock: + if (time.time() - self._last_updated < self.cache_ttl) and self._cache: + return + + queryables_mapping = await self._db_logic.get_queryables_mapping() + all_queryables_set = set(queryables_mapping.keys()) + + self._all_queryables = all_queryables_set + + self._cache = {"*": list(all_queryables_set)} + self._last_updated = time.time() + + async def get_all_queryables(self) -> Set[str]: + """ + Return a set of all queryable attributes across all collections. + + This method will update the cache if it's stale or has been cleared. + """ + if not self.validation_enabled: + return set() + + if (time.time() - self._last_updated >= self.cache_ttl) or not self._cache: + await self._update_cache() + return self._all_queryables + + async def validate(self, fields: Set[str]) -> None: + """ + Validate if the provided fields are queryable. + + Raises HTTPException if invalid fields are found. + """ + if not self.validation_enabled: + return + + allowed_fields = await self.get_all_queryables() + invalid_fields = fields - allowed_fields + if invalid_fields: + raise HTTPException( + status_code=400, + detail=f"Invalid query fields: {', '.join(invalid_fields)}. Allowed fields are: {', '.join(allowed_fields)}", + ) + + +_queryables_cache_instance: Optional[QueryablesCache] = None + + +def initialize_queryables_cache(database_logic: BaseDatabaseLogic): + """ + Initialize the global queryables cache. + + :param database_logic: An instance of DatabaseLogic. + """ + global _queryables_cache_instance + if _queryables_cache_instance is None: + _queryables_cache_instance = QueryablesCache(database_logic) + + +async def all_queryables() -> Set[str]: + """Get all queryable properties from the cache.""" + if _queryables_cache_instance is None: + raise Exception("Queryables cache not initialized.") + return await _queryables_cache_instance.get_all_queryables() + + +async def validate_queryables(fields: Set[str]) -> None: + """Validate if the provided fields are queryable.""" + if _queryables_cache_instance is None: + return + await _queryables_cache_instance.validate(fields) + + +def reload_queryables_settings(): + """Reload queryables settings from environment variables.""" + if _queryables_cache_instance: + _queryables_cache_instance.reload_settings() + + +def get_properties_from_cql2_filter(cql2_filter: Dict[str, Any]) -> Set[str]: + """Recursively extract property names from a CQL2 filter.""" + props: Set[str] = set() + if "op" in cql2_filter and "args" in cql2_filter: + for arg in cql2_filter["args"]: + if isinstance(arg, dict): + if "op" in arg: + props.update(get_properties_from_cql2_filter(arg)) + elif "property" in arg: + props.add(arg["property"]) + return props diff --git a/stac_fastapi/tests/api/test_api_query_validation.py b/stac_fastapi/tests/api/test_api_query_validation.py new file mode 100644 index 000000000..e926699b5 --- /dev/null +++ b/stac_fastapi/tests/api/test_api_query_validation.py @@ -0,0 +1,67 @@ +import json +import os +from unittest import mock + +import pytest + +from stac_fastapi.sfeos_helpers.queryables import reload_queryables_settings + + +@pytest.fixture(autouse=True) +def enable_validation(): + with mock.patch.dict(os.environ, {"VALIDATE_QUERYABLES": "true"}): + reload_queryables_settings() + yield + reload_queryables_settings() + + +@pytest.mark.asyncio +async def test_search_post_query_valid_param(app_client, ctx): + """Test POST /search with a valid query parameter""" + query = {"query": {"eo:cloud_cover": {"lt": 10}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_search_post_query_invalid_param(app_client, ctx): + """Test POST /search with an invalid query parameter""" + query = {"query": {"invalid_param": {"eq": "test"}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_item_collection_get_filter_valid_param(app_client, ctx): + """Test GET /collections/{collection_id}/items with a valid filter parameter""" + collection_id = ctx.item["collection"] + filter_body = { + "op": "<", + "args": [{"property": "eo:cloud_cover"}, 10], + } + params = { + "filter-lang": "cql2-json", + "filter": json.dumps(filter_body), + } + resp = await app_client.get(f"/collections/{collection_id}/items", params=params) + assert resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_item_collection_get_filter_invalid_param(app_client, ctx): + """Test GET /collections/{collection_id}/items with an invalid filter parameter""" + collection_id = ctx.item["collection"] + filter_body = { + "op": "=", + "args": [{"property": "invalid_param"}, "test"], + } + params = { + "filter-lang": "cql2-json", + "filter": json.dumps(filter_body), + } + resp = await app_client.get(f"/collections/{collection_id}/items", params=params) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] diff --git a/stac_fastapi/tests/data/test_collection.json b/stac_fastapi/tests/data/test_collection.json index 32a7d254b..dda5b8a0e 100644 --- a/stac_fastapi/tests/data/test_collection.json +++ b/stac_fastapi/tests/data/test_collection.json @@ -6,6 +6,11 @@ "type": "Collection", "description": "Landat 8 imagery radiometrically calibrated and orthorectified using gound points and Digital Elevation Model (DEM) data to correct relief displacement.", "stac_version": "1.0.0", + "queryables": { + "eo:cloud_cover": { + "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover" + } + }, "license": "PDDL-1.0", "summaries": { "platform": [ diff --git a/stac_fastapi/tests/sfeos_helpers/test_queryables.py b/stac_fastapi/tests/sfeos_helpers/test_queryables.py new file mode 100644 index 000000000..3ff882254 --- /dev/null +++ b/stac_fastapi/tests/sfeos_helpers/test_queryables.py @@ -0,0 +1,178 @@ +import os +import time +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from fastapi import HTTPException + +import stac_fastapi.sfeos_helpers.queryables as queryables_module +from stac_fastapi.sfeos_helpers.queryables import ( + QueryablesCache, + all_queryables, + get_properties_from_cql2_filter, + initialize_queryables_cache, + reload_queryables_settings, + validate_queryables, +) + + +class TestQueryablesCache: + @pytest.fixture + def mock_db_logic(self): + db_logic = MagicMock() + db_logic.get_queryables_mapping = AsyncMock( + return_value={"prop1": "type1", "prop2": "type2"} + ) + return db_logic + + @pytest.fixture + def queryables_cache(self, mock_db_logic): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "true", "QUERYABLES_CACHE_TTL": "60"} + ): + cache = QueryablesCache(mock_db_logic) + return cache + + def test_init(self, mock_db_logic): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "true", "QUERYABLES_CACHE_TTL": "120"} + ): + cache = QueryablesCache(mock_db_logic) + assert cache.validation_enabled is True + assert cache.cache_ttl == 120 + + def test_reload_settings(self, queryables_cache): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "false", "QUERYABLES_CACHE_TTL": "300"} + ): + queryables_cache.reload_settings() + assert queryables_cache.validation_enabled is False + assert queryables_cache.cache_ttl == 300 + + @pytest.mark.asyncio + async def test_get_all_queryables_updates_cache( + self, queryables_cache, mock_db_logic + ): + queryables = await queryables_cache.get_all_queryables() + assert queryables == {"prop1", "prop2"} + mock_db_logic.get_queryables_mapping.assert_called_once() + + @pytest.mark.asyncio + async def test_get_all_queryables_uses_cache(self, queryables_cache, mock_db_logic): + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + # Should use cache now + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + @pytest.mark.asyncio + async def test_get_all_queryables_refresh_after_ttl( + self, queryables_cache, mock_db_logic + ): + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + # Simulate time passing + queryables_cache._last_updated = time.time() - queryables_cache.cache_ttl - 1 + + await queryables_cache.get_all_queryables() + assert mock_db_logic.get_queryables_mapping.call_count == 2 + + @pytest.mark.asyncio + async def test_get_all_queryables_disabled(self, queryables_cache): + queryables_cache.validation_enabled = False + queryables = await queryables_cache.get_all_queryables() + assert queryables == set() + + @pytest.mark.asyncio + async def test_validate_valid_fields(self, queryables_cache): + await queryables_cache.validate({"prop1"}) + + @pytest.mark.asyncio + async def test_validate_invalid_fields(self, queryables_cache): + with pytest.raises(HTTPException) as excinfo: + await queryables_cache.validate({"invalid_prop"}) + assert excinfo.value.status_code == 400 + assert "Invalid query fields: invalid_prop" in str(excinfo.value.detail) + + @pytest.mark.asyncio + async def test_validate_disabled(self, queryables_cache): + queryables_cache.validation_enabled = False + await queryables_cache.validate({"invalid_prop"}) + + +class TestGlobalFunctions: + @pytest.fixture(autouse=True) + def reset_global_cache(self): + original = queryables_module._queryables_cache_instance + queryables_module._queryables_cache_instance = None + yield + + queryables_module._queryables_cache_instance = original + + def test_initialize_queryables_cache(self): + db_logic = MagicMock() + initialize_queryables_cache(db_logic) + assert queryables_module._queryables_cache_instance is not None + assert queryables_module._queryables_cache_instance._db_logic == db_logic + + @pytest.mark.asyncio + async def test_all_queryables_not_initialized(self): + with pytest.raises(Exception) as excinfo: + await all_queryables() + assert "Queryables cache not initialized" in str(excinfo.value) + + @pytest.mark.asyncio + async def test_all_queryables(self): + db_logic = MagicMock() + db_logic.get_queryables_mapping = AsyncMock(return_value={"p1": "t1"}) + initialize_queryables_cache(db_logic) + + queryables_module._queryables_cache_instance.validation_enabled = True + + res = await all_queryables() + assert res == {"p1"} + + @pytest.mark.asyncio + async def test_validate_queryables(self): + db_logic = MagicMock() + db_logic.get_queryables_mapping = AsyncMock(return_value={"p1": "t1"}) + initialize_queryables_cache(db_logic) + queryables_module._queryables_cache_instance.validation_enabled = True + + await validate_queryables({"p1"}) + + with pytest.raises(HTTPException): + await validate_queryables({"invalid"}) + + def test_reload_queryables_settings(self): + db_logic = MagicMock() + initialize_queryables_cache(db_logic) + + with patch.dict(os.environ, {"VALIDATE_QUERYABLES": "false"}): + reload_queryables_settings() + assert ( + queryables_module._queryables_cache_instance.validation_enabled is False + ) + + +def test_get_properties_from_cql2_filter(): + # Simple prop + cql2 = {"op": "=", "args": [{"property": "prop1"}, "value"]} + props = get_properties_from_cql2_filter(cql2) + assert props == {"prop1"} + + # Nested props + cql2_nested = { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "prop1"}, "v1"]}, + {"op": "<", "args": [{"property": "prop2"}, 10]}, + ], + } + props = get_properties_from_cql2_filter(cql2_nested) + assert props == {"prop1", "prop2"} + + # Empty/invalid + assert get_properties_from_cql2_filter({}) == set() From 7434493db28b7a4d0a9c258d2e31c3f46be150f4 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Wed, 19 Nov 2025 14:56:36 +0100 Subject: [PATCH 02/12] fix: remove unnecessary info from error message for invalid query fields in QueryablesCache --- .../sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py index fb1065b3e..2eb4d99c7 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py @@ -80,7 +80,7 @@ async def validate(self, fields: Set[str]) -> None: if invalid_fields: raise HTTPException( status_code=400, - detail=f"Invalid query fields: {', '.join(invalid_fields)}. Allowed fields are: {', '.join(allowed_fields)}", + detail=f"Invalid query fields: {', '.join(invalid_fields)}.", ) From 15ad1b9f1922a5be5f25c9db07b46040a069f2c6 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Wed, 19 Nov 2025 15:19:21 +0100 Subject: [PATCH 03/12] feat: add queryables validation and cache configuration to README --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index f2a7f498e..6b041f942 100644 --- a/README.md +++ b/README.md @@ -369,6 +369,8 @@ You can customize additional settings in your `.env` file: | `USE_DATETIME_NANOS` | Enables nanosecond precision handling for `datetime` field searches as per the `date_nanos` type. When `False`, it uses 3 millisecond precision as per the type `date`. | `true` | Optional | | `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | +| `VALIDATE_QUERYABLES` | Enable validation of query parameters against the collection's queryables. If set to `true`, the API will reject queries containing fields that are not defined in the collection's queryables. | `false` | Optional | +| `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `3600` | Optional | > [!NOTE] @@ -424,6 +426,28 @@ EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,properties.storage:schemes,pro - Excluded fields and their nested children will be skipped during field traversal - Both the field itself and any nested properties will be excluded +## Queryables Validation + +SFEOS supports validating query parameters against the collection's defined queryables. This ensures that users only query fields that are explicitly exposed and indexed. + +**Configuration:** + +To enable queryables validation, set the following environment variables: + +```bash +VALIDATE_QUERYABLES=true +QUERYABLES_CACHE_TTL=3600 # Optional, defaults to 3600 seconds (1 hour) +``` + +**Behavior:** + +- When enabled, the API maintains a cache of all queryable fields across all collections. +- Search requests (both GET and POST) are checked against this cache. +- If a request contains a query parameter or filter field that is not in the list of allowed queryables, the API returns a `400 Bad Request` error with a message indicating the invalid field(s). +- The cache is automatically refreshed based on the `QUERYABLES_CACHE_TTL` setting. + +This feature helps prevent queries on unindexed fields which could lead to poor performance or unexpected results. + ## Datetime-Based Index Management ### Overview From 5de02fcb14af9152020f9aaae32dbd7e7f814f76 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Thu, 20 Nov 2025 09:46:43 +0100 Subject: [PATCH 04/12] Update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e66e2b972..124568b61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Environment variable `VALIDATE_QUERYABLES` to enable/disable validation of queryables in search/filter requests. When set to `true`, search requests will be validated against the defined queryables, returning an error for any unsupported fields. Defaults to `false` for backward compatibility.[#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) + +- Environment variable `QUERYABLES_CACHE_TTL` to configure the TTL (in seconds) for caching queryables. Default is `3600` seconds (1 hour) to balance performance and freshness of queryables data. [#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) + - Added retry with back-off logic for Redis related functions. [#528](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/528) + - Added nanosecond precision datetime filtering that ensures nanosecond precision support in filtering by datetime. This is configured via the `USE_DATETIME_NANOS` environment variable, while maintaining microseconds compatibility for datetime precision. [#529](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/529) ### Changed From 3c3b5cb6cf83ec3e5574edabe85b6cbc1b34f028 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Thu, 27 Nov 2025 11:04:50 +0100 Subject: [PATCH 05/12] feat: add support for excluding queryables from validation --- README.md | 3 +- .../stac_fastapi/sfeos_helpers/queryables.py | 15 ++++++++++ .../tests/api/test_api_query_validation.py | 29 +++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6b041f942..19f184d8a 100644 --- a/README.md +++ b/README.md @@ -445,8 +445,9 @@ QUERYABLES_CACHE_TTL=3600 # Optional, defaults to 3600 seconds (1 hour) - Search requests (both GET and POST) are checked against this cache. - If a request contains a query parameter or filter field that is not in the list of allowed queryables, the API returns a `400 Bad Request` error with a message indicating the invalid field(s). - The cache is automatically refreshed based on the `QUERYABLES_CACHE_TTL` setting. +- **Interaction with `EXCLUDED_FROM_QUERYABLES`**: If `VALIDATE_QUERYABLES` is enabled, fields listed in `EXCLUDED_FROM_QUERYABLES` will also be considered invalid for filtering. This effectively enforces the exclusion of these fields from search queries. -This feature helps prevent queries on unindexed fields which could lead to poor performance or unexpected results. +This feature helps prevent queries on non-queryable fields which could lead to unnecessary load on the database. ## Datetime-Based Index Management diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py index 2eb4d99c7..c7fa56b32 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py @@ -27,6 +27,7 @@ def __init__(self, database_logic: Any): self._lock = asyncio.Lock() self.validation_enabled: bool = False self.cache_ttl: int = 3600 # How often to refresh cache (in seconds) + self.excluded_queryables: Set[str] = set() self.reload_settings() def reload_settings(self): @@ -36,6 +37,17 @@ def reload_settings(self): ) self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "3600")) + excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") + self.excluded_queryables = set() + if excluded: + for field in excluded.split(","): + field = field.strip() + if field: + # Remove 'properties.' prefix if present + if field.startswith("properties."): + field = field[11:] + self.excluded_queryables.add(field) + async def _update_cache(self): """Update the cache with the latest queryables from the database.""" if not self.validation_enabled: @@ -48,6 +60,9 @@ async def _update_cache(self): queryables_mapping = await self._db_logic.get_queryables_mapping() all_queryables_set = set(queryables_mapping.keys()) + if self.excluded_queryables: + all_queryables_set = all_queryables_set - self.excluded_queryables + self._all_queryables = all_queryables_set self._cache = {"*": list(all_queryables_set)} diff --git a/stac_fastapi/tests/api/test_api_query_validation.py b/stac_fastapi/tests/api/test_api_query_validation.py index e926699b5..ba8239003 100644 --- a/stac_fastapi/tests/api/test_api_query_validation.py +++ b/stac_fastapi/tests/api/test_api_query_validation.py @@ -65,3 +65,32 @@ async def test_item_collection_get_filter_invalid_param(app_client, ctx): assert resp.status_code == 400 resp_json = resp.json() assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_validate_queryables_excluded(app_client, ctx): + """Test that excluded queryables are rejected when validation is enabled.""" + + excluded_field = "eo:cloud_cover" + + with mock.patch.dict( + os.environ, + { + "VALIDATE_QUERYABLES": "true", + "EXCLUDED_FROM_QUERYABLES": excluded_field, + "QUERYABLES_CACHE_TTL": "0", + }, + ): + reload_queryables_settings() + + query = {"query": {excluded_field: {"lt": 10}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 400 + assert "Invalid query fields" in resp.json()["detail"] + assert excluded_field in resp.json()["detail"] + + query = {"query": {"id": {"eq": "test-item"}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 200 + + reload_queryables_settings() From a46665e63c4d0a77723372541948df9bc320f762 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Mon, 1 Dec 2025 10:57:43 +0100 Subject: [PATCH 06/12] feat: move queryables cache and validation logic to core --- README.md | 2 +- .../stac_fastapi/core/base_database_logic.py | 5 ++ stac_fastapi/core/stac_fastapi/core/core.py | 15 +++-- .../stac_fastapi/core}/queryables.py | 38 +----------- .../tests/api/test_api_query_validation.py | 30 +++++++-- .../test_queryables.py | 62 +------------------ 6 files changed, 40 insertions(+), 112 deletions(-) rename stac_fastapi/{sfeos_helpers/stac_fastapi/sfeos_helpers => core/stac_fastapi/core}/queryables.py (76%) rename stac_fastapi/tests/{sfeos_helpers => core}/test_queryables.py (64%) diff --git a/README.md b/README.md index 19f184d8a..36ca2b3df 100644 --- a/README.md +++ b/README.md @@ -367,7 +367,7 @@ You can customize additional settings in your `.env` file: | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional | | `USE_DATETIME_NANOS` | Enables nanosecond precision handling for `datetime` field searches as per the `date_nanos` type. When `False`, it uses 3 millisecond precision as per the type `date`. | `true` | Optional | -| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | +| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. If `VALIDATE_QUERYABLES` is enabled, these fields will also be considered invalid for filtering. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | | `VALIDATE_QUERYABLES` | Enable validation of query parameters against the collection's queryables. If set to `true`, the API will reject queries containing fields that are not defined in the collection's queryables. | `false` | Optional | | `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `3600` | Optional | diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py index 105fdf925..1ed5265b7 100644 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -138,3 +138,8 @@ async def delete_collection( ) -> None: """Delete a collection from the database.""" pass + + @abc.abstractmethod + async def get_queryables_mapping(self, collection_id: str = "*") -> Dict[str, Any]: + """Retrieve mapping of Queryables for search.""" + pass diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 56bff9b28..fad0a979f 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -24,6 +24,10 @@ from stac_fastapi.core.base_settings import ApiBaseSettings from stac_fastapi.core.datetime_utils import format_datetime_range from stac_fastapi.core.models.links import PagingLinks +from stac_fastapi.core.queryables import ( + QueryablesCache, + get_properties_from_cql2_filter, +) from stac_fastapi.core.redis_utils import redis_pagination_links from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer from stac_fastapi.core.session import Session @@ -39,11 +43,6 @@ BulkTransactionMethod, Items, ) -from stac_fastapi.sfeos_helpers.queryables import ( - get_properties_from_cql2_filter, - initialize_queryables_cache, - validate_queryables, -) from stac_fastapi.types import stac as stac_types from stac_fastapi.types.conformance import BASE_CONFORMANCE_CLASSES from stac_fastapi.types.core import AsyncBaseCoreClient @@ -95,7 +94,7 @@ class CoreClient(AsyncBaseCoreClient): def __attrs_post_init__(self): """Initialize the queryables cache.""" - initialize_queryables_cache(self.database) + self.queryables_cache = QueryablesCache(self.database) def _landing_page( self, @@ -826,7 +825,7 @@ async def post_search( if hasattr(search_request, "query") and getattr(search_request, "query"): query_fields = set(getattr(search_request, "query").keys()) - await validate_queryables(query_fields) + await self.queryables_cache.validate(query_fields) for field_name, expr in getattr(search_request, "query").items(): field = "properties__" + field_name for op, value in expr.items(): @@ -846,7 +845,7 @@ async def post_search( if cql2_filter is not None: try: query_fields = get_properties_from_cql2_filter(cql2_filter) - await validate_queryables(query_fields) + await self.queryables_cache.validate(query_fields) search = await self.database.apply_cql2_filter(search, cql2_filter) except HTTPException: raise diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py b/stac_fastapi/core/stac_fastapi/core/queryables.py similarity index 76% rename from stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py rename to stac_fastapi/core/stac_fastapi/core/queryables.py index c7fa56b32..63c63ba8f 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/queryables.py +++ b/stac_fastapi/core/stac_fastapi/core/queryables.py @@ -3,12 +3,10 @@ import asyncio import os import time -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, List, Set from fastapi import HTTPException -from stac_fastapi.core.base_database_logic import BaseDatabaseLogic - class QueryablesCache: """A thread-safe, time-based cache for queryable properties.""" @@ -99,40 +97,6 @@ async def validate(self, fields: Set[str]) -> None: ) -_queryables_cache_instance: Optional[QueryablesCache] = None - - -def initialize_queryables_cache(database_logic: BaseDatabaseLogic): - """ - Initialize the global queryables cache. - - :param database_logic: An instance of DatabaseLogic. - """ - global _queryables_cache_instance - if _queryables_cache_instance is None: - _queryables_cache_instance = QueryablesCache(database_logic) - - -async def all_queryables() -> Set[str]: - """Get all queryable properties from the cache.""" - if _queryables_cache_instance is None: - raise Exception("Queryables cache not initialized.") - return await _queryables_cache_instance.get_all_queryables() - - -async def validate_queryables(fields: Set[str]) -> None: - """Validate if the provided fields are queryable.""" - if _queryables_cache_instance is None: - return - await _queryables_cache_instance.validate(fields) - - -def reload_queryables_settings(): - """Reload queryables settings from environment variables.""" - if _queryables_cache_instance: - _queryables_cache_instance.reload_settings() - - def get_properties_from_cql2_filter(cql2_filter: Dict[str, Any]) -> Set[str]: """Recursively extract property names from a CQL2 filter.""" props: Set[str] = set() diff --git a/stac_fastapi/tests/api/test_api_query_validation.py b/stac_fastapi/tests/api/test_api_query_validation.py index ba8239003..a071a0294 100644 --- a/stac_fastapi/tests/api/test_api_query_validation.py +++ b/stac_fastapi/tests/api/test_api_query_validation.py @@ -4,15 +4,34 @@ import pytest -from stac_fastapi.sfeos_helpers.queryables import reload_queryables_settings +if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": + from stac_fastapi.opensearch.app import app_config +else: + from stac_fastapi.elasticsearch.app import app_config + + +def get_core_client(): + if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": + from stac_fastapi.opensearch.app import app_config + else: + from stac_fastapi.elasticsearch.app import app_config + return app_config["client"] + + +def reload_queryables_settings(): + client = get_core_client() + if hasattr(client, "queryables_cache"): + client.queryables_cache.reload_settings() @pytest.fixture(autouse=True) def enable_validation(): + + client = app_config["client"] with mock.patch.dict(os.environ, {"VALIDATE_QUERYABLES": "true"}): - reload_queryables_settings() + client.queryables_cache.reload_settings() yield - reload_queryables_settings() + client.queryables_cache.reload_settings() @pytest.mark.asyncio @@ -72,6 +91,7 @@ async def test_validate_queryables_excluded(app_client, ctx): """Test that excluded queryables are rejected when validation is enabled.""" excluded_field = "eo:cloud_cover" + client = app_config["client"] with mock.patch.dict( os.environ, @@ -81,7 +101,7 @@ async def test_validate_queryables_excluded(app_client, ctx): "QUERYABLES_CACHE_TTL": "0", }, ): - reload_queryables_settings() + client.queryables_cache.reload_settings() query = {"query": {excluded_field: {"lt": 10}}} resp = await app_client.post("/search", json=query) @@ -93,4 +113,4 @@ async def test_validate_queryables_excluded(app_client, ctx): resp = await app_client.post("/search", json=query) assert resp.status_code == 200 - reload_queryables_settings() + client.queryables_cache.reload_settings() diff --git a/stac_fastapi/tests/sfeos_helpers/test_queryables.py b/stac_fastapi/tests/core/test_queryables.py similarity index 64% rename from stac_fastapi/tests/sfeos_helpers/test_queryables.py rename to stac_fastapi/tests/core/test_queryables.py index 3ff882254..10a742049 100644 --- a/stac_fastapi/tests/sfeos_helpers/test_queryables.py +++ b/stac_fastapi/tests/core/test_queryables.py @@ -5,14 +5,9 @@ import pytest from fastapi import HTTPException -import stac_fastapi.sfeos_helpers.queryables as queryables_module -from stac_fastapi.sfeos_helpers.queryables import ( +from stac_fastapi.core.queryables import ( QueryablesCache, - all_queryables, get_properties_from_cql2_filter, - initialize_queryables_cache, - reload_queryables_settings, - validate_queryables, ) @@ -102,61 +97,6 @@ async def test_validate_disabled(self, queryables_cache): await queryables_cache.validate({"invalid_prop"}) -class TestGlobalFunctions: - @pytest.fixture(autouse=True) - def reset_global_cache(self): - original = queryables_module._queryables_cache_instance - queryables_module._queryables_cache_instance = None - yield - - queryables_module._queryables_cache_instance = original - - def test_initialize_queryables_cache(self): - db_logic = MagicMock() - initialize_queryables_cache(db_logic) - assert queryables_module._queryables_cache_instance is not None - assert queryables_module._queryables_cache_instance._db_logic == db_logic - - @pytest.mark.asyncio - async def test_all_queryables_not_initialized(self): - with pytest.raises(Exception) as excinfo: - await all_queryables() - assert "Queryables cache not initialized" in str(excinfo.value) - - @pytest.mark.asyncio - async def test_all_queryables(self): - db_logic = MagicMock() - db_logic.get_queryables_mapping = AsyncMock(return_value={"p1": "t1"}) - initialize_queryables_cache(db_logic) - - queryables_module._queryables_cache_instance.validation_enabled = True - - res = await all_queryables() - assert res == {"p1"} - - @pytest.mark.asyncio - async def test_validate_queryables(self): - db_logic = MagicMock() - db_logic.get_queryables_mapping = AsyncMock(return_value={"p1": "t1"}) - initialize_queryables_cache(db_logic) - queryables_module._queryables_cache_instance.validation_enabled = True - - await validate_queryables({"p1"}) - - with pytest.raises(HTTPException): - await validate_queryables({"invalid"}) - - def test_reload_queryables_settings(self): - db_logic = MagicMock() - initialize_queryables_cache(db_logic) - - with patch.dict(os.environ, {"VALIDATE_QUERYABLES": "false"}): - reload_queryables_settings() - assert ( - queryables_module._queryables_cache_instance.validation_enabled is False - ) - - def test_get_properties_from_cql2_filter(): # Simple prop cql2 = {"op": "=", "args": [{"property": "prop1"}, "value"]} From 93c6e78d1345f9125c2424a2634494bb16d80dc4 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Tue, 2 Dec 2025 11:04:09 +0100 Subject: [PATCH 07/12] fix: update QUERYABLES_CACHE_TTL default value to 21600 seconds (6 hours) --- CHANGELOG.md | 2 +- README.md | 4 ++-- stac_fastapi/core/stac_fastapi/core/queryables.py | 4 ++-- .../sfeos_helpers/search_engine/selection/cache_manager.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3c619d02..326e9e604 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Environment variable `VALIDATE_QUERYABLES` to enable/disable validation of queryables in search/filter requests. When set to `true`, search requests will be validated against the defined queryables, returning an error for any unsupported fields. Defaults to `false` for backward compatibility.[#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) -- Environment variable `QUERYABLES_CACHE_TTL` to configure the TTL (in seconds) for caching queryables. Default is `3600` seconds (1 hour) to balance performance and freshness of queryables data. [#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) +- Environment variable `QUERYABLES_CACHE_TTL` to configure the TTL (in seconds) for caching queryables. Default is `21600` seconds (6 hours) to balance performance and freshness of queryables data. [#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) ### Changed diff --git a/README.md b/README.md index 36ca2b3df..e4a8e0690 100644 --- a/README.md +++ b/README.md @@ -370,7 +370,7 @@ You can customize additional settings in your `.env` file: | `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. If `VALIDATE_QUERYABLES` is enabled, these fields will also be considered invalid for filtering. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | | `VALIDATE_QUERYABLES` | Enable validation of query parameters against the collection's queryables. If set to `true`, the API will reject queries containing fields that are not defined in the collection's queryables. | `false` | Optional | -| `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `3600` | Optional | +| `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `21600` | Optional | > [!NOTE] @@ -436,7 +436,7 @@ To enable queryables validation, set the following environment variables: ```bash VALIDATE_QUERYABLES=true -QUERYABLES_CACHE_TTL=3600 # Optional, defaults to 3600 seconds (1 hour) +QUERYABLES_CACHE_TTL=21600 # Optional, defaults to 21600 seconds (6 hours) ``` **Behavior:** diff --git a/stac_fastapi/core/stac_fastapi/core/queryables.py b/stac_fastapi/core/stac_fastapi/core/queryables.py index 63c63ba8f..e97125c8c 100644 --- a/stac_fastapi/core/stac_fastapi/core/queryables.py +++ b/stac_fastapi/core/stac_fastapi/core/queryables.py @@ -24,7 +24,7 @@ def __init__(self, database_logic: Any): self._last_updated: float = 0 self._lock = asyncio.Lock() self.validation_enabled: bool = False - self.cache_ttl: int = 3600 # How often to refresh cache (in seconds) + self.cache_ttl: int = 21600 # How often to refresh cache (in seconds) self.excluded_queryables: Set[str] = set() self.reload_settings() @@ -33,7 +33,7 @@ def reload_settings(self): self.validation_enabled = ( os.getenv("VALIDATE_QUERYABLES", "false").lower() == "true" ) - self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "3600")) + self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "21600")) excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") self.excluded_queryables = set() diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py index 3b65244d4..364f4a09b 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py @@ -12,7 +12,7 @@ class IndexCacheManager: """Manages caching of index aliases with expiration.""" - def __init__(self, cache_ttl_seconds: int = 3600): + def __init__(self, cache_ttl_seconds: int = 21600): """Initialize the cache manager. Args: From 405a4dff0261ff9332af7ee6b08cd66281339a36 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Tue, 2 Dec 2025 12:53:58 +0100 Subject: [PATCH 08/12] fix: update QUERYABLES_CACHE_TTL default value to 1800 seconds (30 minutes) --- CHANGELOG.md | 2 +- README.md | 4 ++-- stac_fastapi/core/stac_fastapi/core/queryables.py | 4 ++-- .../sfeos_helpers/search_engine/selection/cache_manager.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 326e9e604..24dd6979c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Environment variable `VALIDATE_QUERYABLES` to enable/disable validation of queryables in search/filter requests. When set to `true`, search requests will be validated against the defined queryables, returning an error for any unsupported fields. Defaults to `false` for backward compatibility.[#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) -- Environment variable `QUERYABLES_CACHE_TTL` to configure the TTL (in seconds) for caching queryables. Default is `21600` seconds (6 hours) to balance performance and freshness of queryables data. [#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) +- Environment variable `QUERYABLES_CACHE_TTL` to configure the TTL (in seconds) for caching queryables. Default is `1800` seconds (30 minutes) to balance performance and freshness of queryables data. [#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) ### Changed diff --git a/README.md b/README.md index e4a8e0690..4279577c4 100644 --- a/README.md +++ b/README.md @@ -370,7 +370,7 @@ You can customize additional settings in your `.env` file: | `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. If `VALIDATE_QUERYABLES` is enabled, these fields will also be considered invalid for filtering. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | | `VALIDATE_QUERYABLES` | Enable validation of query parameters against the collection's queryables. If set to `true`, the API will reject queries containing fields that are not defined in the collection's queryables. | `false` | Optional | -| `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `21600` | Optional | +| `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `1800` | Optional | > [!NOTE] @@ -436,7 +436,7 @@ To enable queryables validation, set the following environment variables: ```bash VALIDATE_QUERYABLES=true -QUERYABLES_CACHE_TTL=21600 # Optional, defaults to 21600 seconds (6 hours) +QUERYABLES_CACHE_TTL=1800 # Optional, defaults to 1800 seconds (30 minutes) ``` **Behavior:** diff --git a/stac_fastapi/core/stac_fastapi/core/queryables.py b/stac_fastapi/core/stac_fastapi/core/queryables.py index e97125c8c..90c28717b 100644 --- a/stac_fastapi/core/stac_fastapi/core/queryables.py +++ b/stac_fastapi/core/stac_fastapi/core/queryables.py @@ -24,7 +24,7 @@ def __init__(self, database_logic: Any): self._last_updated: float = 0 self._lock = asyncio.Lock() self.validation_enabled: bool = False - self.cache_ttl: int = 21600 # How often to refresh cache (in seconds) + self.cache_ttl: int = 1800 # How often to refresh cache (in seconds) self.excluded_queryables: Set[str] = set() self.reload_settings() @@ -33,7 +33,7 @@ def reload_settings(self): self.validation_enabled = ( os.getenv("VALIDATE_QUERYABLES", "false").lower() == "true" ) - self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "21600")) + self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "1800")) excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") self.excluded_queryables = set() diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py index 364f4a09b..fb11906c3 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py @@ -12,7 +12,7 @@ class IndexCacheManager: """Manages caching of index aliases with expiration.""" - def __init__(self, cache_ttl_seconds: int = 21600): + def __init__(self, cache_ttl_seconds: int = 1800): """Initialize the cache manager. Args: From 1b586e42a2c52f148e28a53d5be8a7792c17e34d Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Tue, 9 Dec 2025 20:42:20 +0100 Subject: [PATCH 09/12] fix: improve queryables handling and CQL2 filter property normalization Combines the following fixes: - Normalize property names by stripping 'properties.' prefix in CQL2 filter - Enhance field mapping to handle 'properties.' prefix in Elasticsearch queries - Fix nested property traversal and improve error messages - Implement exclusion of fields from queryables based on environment variable - Enhance exclusion logic for queryables to support top-level fields and properties prefix --- .../core/stac_fastapi/core/queryables.py | 31 +- .../sfeos_helpers/database/mapping.py | 93 ++++- .../sfeos_helpers/filter/client.py | 53 ++- .../sfeos_helpers/filter/transform.py | 15 +- .../tests/api/test_api_query_validation.py | 55 +++ stac_fastapi/tests/core/test_queryables.py | 19 + stac_fastapi/tests/database/test_mapping.py | 382 ++++++++++++++++++ 7 files changed, 610 insertions(+), 38 deletions(-) create mode 100644 stac_fastapi/tests/database/test_mapping.py diff --git a/stac_fastapi/core/stac_fastapi/core/queryables.py b/stac_fastapi/core/stac_fastapi/core/queryables.py index 90c28717b..60bd5c779 100644 --- a/stac_fastapi/core/stac_fastapi/core/queryables.py +++ b/stac_fastapi/core/stac_fastapi/core/queryables.py @@ -25,7 +25,6 @@ def __init__(self, database_logic: Any): self._lock = asyncio.Lock() self.validation_enabled: bool = False self.cache_ttl: int = 1800 # How often to refresh cache (in seconds) - self.excluded_queryables: Set[str] = set() self.reload_settings() def reload_settings(self): @@ -35,17 +34,6 @@ def reload_settings(self): ) self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "1800")) - excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") - self.excluded_queryables = set() - if excluded: - for field in excluded.split(","): - field = field.strip() - if field: - # Remove 'properties.' prefix if present - if field.startswith("properties."): - field = field[11:] - self.excluded_queryables.add(field) - async def _update_cache(self): """Update the cache with the latest queryables from the database.""" if not self.validation_enabled: @@ -58,9 +46,6 @@ async def _update_cache(self): queryables_mapping = await self._db_logic.get_queryables_mapping() all_queryables_set = set(queryables_mapping.keys()) - if self.excluded_queryables: - all_queryables_set = all_queryables_set - self.excluded_queryables - self._all_queryables = all_queryables_set self._cache = {"*": list(all_queryables_set)} @@ -93,12 +78,18 @@ async def validate(self, fields: Set[str]) -> None: if invalid_fields: raise HTTPException( status_code=400, - detail=f"Invalid query fields: {', '.join(invalid_fields)}.", + detail=f"Invalid query fields: {', '.join(sorted(invalid_fields))}. " + "These fields are not defined in the collection's queryables. " + "Use the /queryables endpoint to see available fields.", ) def get_properties_from_cql2_filter(cql2_filter: Dict[str, Any]) -> Set[str]: - """Recursively extract property names from a CQL2 filter.""" + """Recursively extract property names from a CQL2 filter. + + Property names are normalized by stripping the 'properties.' prefix + if present, to match queryables stored without the prefix. + """ props: Set[str] = set() if "op" in cql2_filter and "args" in cql2_filter: for arg in cql2_filter["args"]: @@ -106,5 +97,9 @@ def get_properties_from_cql2_filter(cql2_filter: Dict[str, Any]) -> Set[str]: if "op" in arg: props.update(get_properties_from_cql2_filter(arg)) elif "property" in arg: - props.add(arg["property"]) + prop_name = arg["property"] + # Strip 'properties.' prefix if present + if prop_name.startswith("properties."): + prop_name = prop_name[11:] + props.add(prop_name) return props diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py index 8f6646512..70f07efa7 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py @@ -3,14 +3,62 @@ This module provides functions for working with Elasticsearch/OpenSearch mappings. """ -from typing import Any, Dict +import os +from collections import deque +from typing import Any, Dict, Set + + +def _get_excluded_from_queryables() -> Set[str]: + """Get fields to exclude from queryables endpoint and filtering. + + Reads from EXCLUDED_FROM_QUERYABLES environment variable. + Supports comma-separated list of field names. + + For each exclusion pattern, both the original and the version with/without + 'properties.' prefix are included. This ensures fields are excluded regardless + of whether they appear at the top level or under 'properties' in the mapping. + + Example: + EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,storage:schemes" + + This will exclude: + - properties.auth:schemes (and children like properties.auth:schemes.s3.type) + - auth:schemes (and children like auth:schemes.s3.type) + - storage:schemes (and children) + - properties.storage:schemes (and children) + + Returns: + Set[str]: Set of field names to exclude from queryables + """ + excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") + if not excluded: + return set() + + result = set() + for field in excluded.split(","): + field = field.strip() + if not field: + continue + + result.add(field) + + if field.startswith("properties."): + result.add(field.removeprefix("properties.")) + else: + result.add(f"properties.{field}") + + return result async def get_queryables_mapping_shared( - mappings: Dict[str, Dict[str, Any]], collection_id: str = "*" + mappings: Dict[str, Dict[str, Any]], + collection_id: str = "*", ) -> Dict[str, str]: """Retrieve mapping of Queryables for search. + Fields listed in the EXCLUDED_FROM_QUERYABLES environment variable will be + excluded from the result, along with their children. + Args: mappings (Dict[str, Dict[str, Any]]): The mapping information returned from Elasticsearch/OpenSearch client's indices.get_mapping() method. @@ -20,19 +68,44 @@ async def get_queryables_mapping_shared( Returns: Dict[str, str]: A dictionary containing the Queryables mappings, where keys are - field names and values are the corresponding paths in the Elasticsearch/OpenSearch - document structure. + field names (with 'properties.' prefix removed) and values are the + corresponding paths in the Elasticsearch/OpenSearch document structure. """ queryables_mapping = {} + excluded = _get_excluded_from_queryables() + + def is_excluded(path: str) -> bool: + """Check if the path starts with any excluded prefix.""" + return any( + path == prefix or path.startswith(prefix + ".") for prefix in excluded + ) for mapping in mappings.values(): - fields = mapping["mappings"].get("properties", {}) - properties = fields.pop("properties", {}).get("properties", {}).keys() + mapping_properties = mapping["mappings"].get("properties", {}) + + stack: deque[tuple[str, Dict[str, Any]]] = deque(mapping_properties.items()) + + while stack: + field_fqn, field_def = stack.popleft() + + nested_properties = field_def.get("properties") + if nested_properties: + stack.extend( + (f"{field_fqn}.{k}", v) + for k, v in nested_properties.items() + if v.get("enabled", True) and not is_excluded(f"{field_fqn}.{k}") + ) + + field_type = field_def.get("type") + if ( + not field_type + or not field_def.get("enabled", True) + or is_excluded(field_fqn) + ): + continue - for field_key in fields: - queryables_mapping[field_key] = field_key + field_name = field_fqn.removeprefix("properties.") - for property_key in properties: - queryables_mapping[property_key] = f"properties.{property_key}" + queryables_mapping[field_name] = field_fqn return queryables_mapping diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py index ac19d65d8..7febde1a5 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py @@ -26,8 +26,12 @@ def _get_excluded_from_queryables() -> set[str]: Reads from EXCLUDED_FROM_QUERYABLES environment variable. Supports comma-separated list of field names. + For each exclusion pattern, both the original and the version with/without + 'properties.' prefix are included. This ensures fields are excluded regardless + of whether they appear at the top level or under 'properties' in the mapping. + Example: - EXCLUDED_FROM_QUERYABLES="auth:schemes,storage:schemes" + EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,storage:schemes" Returns: Set[str]: Set of field names to exclude from queryables @@ -35,7 +39,41 @@ def _get_excluded_from_queryables() -> set[str]: excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") if not excluded: return set() - return {field.strip() for field in excluded.split(",") if field.strip()} + + result = set() + for field in excluded.split(","): + field = field.strip() + if not field: + continue + + result.add(field) + + if field.startswith("properties."): + result.add(field.removeprefix("properties.")) + else: + result.add(f"properties.{field}") + + return result + + @staticmethod + def _is_excluded(field_fqn: str, excluded: set[str]) -> bool: + """Check if a field should be excluded based on prefix matching. + + A field is excluded if: + - It exactly matches an exclusion pattern + - It starts with an exclusion pattern followed by a dot (nested child) + + Args: + field_fqn: Fully qualified field name (e.g., "properties.auth:schemes.s3.type") + excluded: Set of exclusion patterns + + Returns: + True if field should be excluded, False otherwise + """ + for prefix in excluded: + if field_fqn == prefix or field_fqn.startswith(prefix + "."): + return True + return False async def get_queryables( self, @@ -92,23 +130,20 @@ async def get_queryables( while stack: field_fqn, field_def = stack.popleft() - # Iterate over nested fields + if self._is_excluded(field_fqn, excluded_fields): + continue + field_properties = field_def.get("properties") if field_properties: stack.extend( (f"{field_fqn}.{k}", v) for k, v in field_properties.items() if v.get("enabled", True) - and f"{field_fqn}.{k}" not in excluded_fields ) # Skip non-indexed or disabled fields field_type = field_def.get("type") - if ( - not field_type - or not field_def.get("enabled", True) - or field_fqn in excluded_fields - ): + if not field_type or not field_def.get("enabled", True): continue # Fields in Item Properties should be exposed with their un-prefixed names, diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py index 6945a359e..26ba40330 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py @@ -22,7 +22,20 @@ def to_es_field(queryables_mapping: Dict[str, Any], field: str) -> str: Returns: str: The mapped field name suitable for Elasticsearch queries. """ - return queryables_mapping.get(field, field) + # First, try to find the field as-is in the mapping + if field in queryables_mapping: + return queryables_mapping[field] + + # If field has 'properties.' prefix, try without it + # This handles cases where users specify 'properties.eo:cloud_cover' + # but queryables_mapping uses 'eo:cloud_cover' as the key + if field.startswith("properties."): + normalized_field = field[11:] # len("properties.") == 11 + if normalized_field in queryables_mapping: + return queryables_mapping[normalized_field] + + # If not found, return the original field + return field def to_es(queryables_mapping: Dict[str, Any], query: Dict[str, Any]) -> Dict[str, Any]: diff --git a/stac_fastapi/tests/api/test_api_query_validation.py b/stac_fastapi/tests/api/test_api_query_validation.py index a071a0294..2638f871a 100644 --- a/stac_fastapi/tests/api/test_api_query_validation.py +++ b/stac_fastapi/tests/api/test_api_query_validation.py @@ -30,6 +30,8 @@ def enable_validation(): client = app_config["client"] with mock.patch.dict(os.environ, {"VALIDATE_QUERYABLES": "true"}): client.queryables_cache.reload_settings() + client.queryables_cache._cache = {} + client.queryables_cache._last_updated = 0 yield client.queryables_cache.reload_settings() @@ -114,3 +116,56 @@ async def test_validate_queryables_excluded(app_client, ctx): assert resp.status_code == 200 client.queryables_cache.reload_settings() + + +@pytest.mark.asyncio +async def test_search_get_cql2_text_invalid_param(app_client, ctx): + """Test GET /search with an invalid cql2-text filter parameter.""" + params = { + "filter-lang": "cql2-text", + "filter": "properties.invalid_param < 5", + } + resp = await app_client.get("/search", params=params) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_search_get_cql2_text_valid_param(app_client, ctx): + """Test GET /search with a valid cql2-text filter parameter.""" + params = { + "filter-lang": "cql2-text", + "filter": "eo:cloud_cover < 10", + } + resp = await app_client.get("/search", params=params) + assert resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_item_collection_get_cql2_text_invalid_param(app_client, ctx): + """Test GET /collections/{collection_id}/items with invalid cql2-text filter.""" + collection_id = ctx.item["collection"] + params = { + "filter-lang": "cql2-text", + "filter": "properties.invalid_param < 5", + } + resp = await app_client.get(f"/collections/{collection_id}/items", params=params) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_search_get_cql2_text_with_properties_prefix(app_client, ctx): + """Test GET /search with a valid cql2-text filter using properties. prefix. + + This tests the case where users specify 'properties.eo:cloud_cover' instead of + just 'eo:cloud_cover'. Both formats should work correctly. + """ + params = { + "filter-lang": "cql2-text", + "filter": "properties.eo:cloud_cover < 10", + } + resp = await app_client.get("/search", params=params) + assert resp.status_code == 200 diff --git a/stac_fastapi/tests/core/test_queryables.py b/stac_fastapi/tests/core/test_queryables.py index 10a742049..d65792748 100644 --- a/stac_fastapi/tests/core/test_queryables.py +++ b/stac_fastapi/tests/core/test_queryables.py @@ -116,3 +116,22 @@ def test_get_properties_from_cql2_filter(): # Empty/invalid assert get_properties_from_cql2_filter({}) == set() + + +def test_get_properties_from_cql2_filter_strips_properties_prefix(): + """Test that 'properties.' prefix is stripped from property names.""" + # Single property with prefix + cql2 = {"op": "<", "args": [{"property": "properties.none"}, 5]} + props = get_properties_from_cql2_filter(cql2) + assert props == {"none"} + + # Mixed with and without prefix + cql2_nested = { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "properties.test"}, "v1"]}, + {"op": "<", "args": [{"property": "eo:cloud_cover"}, 10]}, + ], + } + props = get_properties_from_cql2_filter(cql2_nested) + assert props == {"test", "eo:cloud_cover"} diff --git a/stac_fastapi/tests/database/test_mapping.py b/stac_fastapi/tests/database/test_mapping.py new file mode 100644 index 000000000..ebf869c06 --- /dev/null +++ b/stac_fastapi/tests/database/test_mapping.py @@ -0,0 +1,382 @@ +"""Tests for the mapping module.""" + +import pytest + +from stac_fastapi.sfeos_helpers.database.mapping import get_queryables_mapping_shared + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_simple(): + """Test basic mapping extraction.""" + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "collection": {"type": "keyword"}, + "properties": { + "properties": { + "datetime": {"type": "date"}, + "eo:cloud_cover": {"type": "float"}, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + assert "id" in result + assert result["id"] == "id" + assert "collection" in result + assert result["collection"] == "collection" + assert "datetime" in result + assert result["datetime"] == "properties.datetime" + assert "eo:cloud_cover" in result + assert result["eo:cloud_cover"] == "properties.eo:cloud_cover" + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_nested_properties(): + """Test that nested properties are properly traversed. + + This tests the case where a property like 'processing:software.eometadatatool' + exists, which is represented as a nested object in Elasticsearch/OpenSearch. + """ + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "processing:software": { + "properties": { + "eometadatatool": {"type": "keyword"}, + "version": {"type": "keyword"}, + } + }, + "eo:cloud_cover": {"type": "float"}, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Check that nested properties are properly traversed + assert "processing:software.eometadatatool" in result + assert ( + result["processing:software.eometadatatool"] + == "properties.processing:software.eometadatatool" + ) + assert "processing:software.version" in result + assert ( + result["processing:software.version"] + == "properties.processing:software.version" + ) + + # Regular properties should still work + assert "eo:cloud_cover" in result + assert result["eo:cloud_cover"] == "properties.eo:cloud_cover" + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_deeply_nested(): + """Test deeply nested properties.""" + mappings = { + "test_index": { + "mappings": { + "properties": { + "properties": { + "properties": { + "level1": { + "properties": { + "level2": { + "properties": {"level3": {"type": "keyword"}} + } + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + assert "level1.level2.level3" in result + assert result["level1.level2.level3"] == "properties.level1.level2.level3" + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_disabled_fields(): + """Test that disabled fields are excluded.""" + mappings = { + "test_index": { + "mappings": { + "properties": { + "properties": { + "properties": { + "enabled_field": {"type": "keyword"}, + "disabled_field": {"type": "keyword", "enabled": False}, + "parent": { + "properties": { + "enabled_nested": {"type": "keyword"}, + "disabled_nested": { + "type": "keyword", + "enabled": False, + }, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + assert "enabled_field" in result + assert "disabled_field" not in result + assert "parent.enabled_nested" in result + assert "parent.disabled_nested" not in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_container_fields(): + """Test that container fields (without type) are not included but their children are.""" + mappings = { + "test_index": { + "mappings": { + "properties": { + "properties": { + "properties": { + # This is a container field with no type + "container": { + "properties": { + "child1": {"type": "keyword"}, + "child2": {"type": "float"}, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Container field should not be in results (no type) + assert "container" not in result + # But its children should be + assert "container.child1" in result + assert "container.child2" in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_multiple_indices(): + """Test mapping from multiple indices are merged.""" + mappings = { + "index1": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "field1": {"type": "keyword"}, + } + }, + } + } + }, + "index2": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "field2": {"type": "float"}, + } + }, + } + } + }, + } + + result = await get_queryables_mapping_shared(mappings) + + assert "field1" in result + assert "field2" in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_excluded_fields(monkeypatch): + """Test that fields in EXCLUDED_FROM_QUERYABLES are excluded.""" + # Set the environment variable to exclude fields + monkeypatch.setenv( + "EXCLUDED_FROM_QUERYABLES", + "properties.auth:schemes,properties.storage:schemes", + ) + + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "eo:cloud_cover": {"type": "float"}, + "auth:schemes": { + "properties": { + "s3": { + "properties": { + "type": {"type": "keyword"}, + "requester_pays": {"type": "boolean"}, + } + }, + "http": { + "properties": { + "type": {"type": "keyword"}, + } + }, + } + }, + "storage:schemes": { + "properties": { + "s3": { + "properties": { + "platform": {"type": "keyword"}, + } + }, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Regular fields should be present + assert "id" in result + assert "eo:cloud_cover" in result + + # Excluded fields and their children should NOT be present + assert "auth:schemes" not in result + assert "auth:schemes.s3" not in result + assert "auth:schemes.s3.type" not in result + assert "auth:schemes.s3.requester_pays" not in result + assert "auth:schemes.http" not in result + assert "auth:schemes.http.type" not in result + assert "storage:schemes" not in result + assert "storage:schemes.s3" not in result + assert "storage:schemes.s3.platform" not in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_excluded_fields_top_level(monkeypatch): + """Test that exclusions work for fields at top level (no properties. prefix in path). + + Some indices (like EOPF) have auth:schemes at the top level, resulting in + field paths like 'auth:schemes.s3.type' instead of 'properties.auth:schemes.s3.type'. + The exclusion should work for both cases. + """ + # Set the environment variable to exclude fields with properties. prefix + monkeypatch.setenv( + "EXCLUDED_FROM_QUERYABLES", + "properties.auth:schemes,properties.storage:schemes", + ) + + # Mapping where auth:schemes is at the TOP level (not under properties.properties) + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "eo:cloud_cover": {"type": "float"}, + "auth:schemes": { + "properties": { + "s3": { + "properties": { + "type": {"type": "keyword"}, + "requester_pays": {"type": "boolean"}, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Regular fields should be present + assert "id" in result + assert "eo:cloud_cover" in result + + # Excluded fields should NOT be present even without properties. prefix in path + assert "auth:schemes" not in result + assert "auth:schemes.s3" not in result + assert "auth:schemes.s3.type" not in result + assert "auth:schemes.s3.requester_pays" not in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_excluded_fields_no_prefix_config( + monkeypatch, +): + """Test that exclusions work when configured WITHOUT properties. prefix. + + If user sets EXCLUDED_FROM_QUERYABLES='auth:schemes', it should also + exclude 'properties.auth:schemes' and vice versa. + """ + # Set the environment variable WITHOUT properties. prefix + monkeypatch.setenv( + "EXCLUDED_FROM_QUERYABLES", + "auth:schemes", + ) + + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "eo:cloud_cover": {"type": "float"}, + "auth:schemes": { + "properties": { + "s3": { + "properties": { + "type": {"type": "keyword"}, + } + }, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Regular fields should be present + assert "id" in result + assert "eo:cloud_cover" in result + + # Excluded fields should NOT be present (properties. prefix auto-added) + assert "auth:schemes" not in result + assert "auth:schemes.s3" not in result + assert "auth:schemes.s3.type" not in result From 85b7432aabe7ecf55ecb2b4285e1e587ae612b0d Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Tue, 9 Dec 2025 20:56:18 +0100 Subject: [PATCH 10/12] fix: remove unused redis_utils import to fix flake8 errors --- stac_fastapi/core/stac_fastapi/core/core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index bf87f502b..18bea46f4 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -28,8 +28,6 @@ QueryablesCache, get_properties_from_cql2_filter, ) -from stac_fastapi.core.redis_utils import redis_pagination_links -from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer from stac_fastapi.core.serializers import ( CatalogSerializer, CollectionSerializer, @@ -101,6 +99,7 @@ class CoreClient(AsyncBaseCoreClient): def __attrs_post_init__(self): """Initialize the queryables cache.""" self.queryables_cache = QueryablesCache(self.database) + def extension_is_enabled(self, extension_name: str) -> bool: """Check if an extension is enabled by checking self.extensions. From e5211e4293d8e8d4223514fe0be9ce113214e416 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Tue, 9 Dec 2025 20:59:48 +0100 Subject: [PATCH 11/12] black format --- stac_fastapi/core/stac_fastapi/core/base_database_logic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py index 4af8ae253..9ade962ba 100644 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -143,7 +143,7 @@ async def delete_collection( async def get_queryables_mapping(self, collection_id: str = "*") -> Dict[str, Any]: """Retrieve mapping of Queryables for search.""" pass - + async def get_all_catalogs( self, token: Optional[str], From 9552caf65ad033935749c2a348aa70c77d949ef0 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Wed, 10 Dec 2025 08:56:21 +0100 Subject: [PATCH 12/12] Readded missing changelogs --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7efecbcc0..8b07d8982 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Environment variable `QUERYABLES_CACHE_TTL` to configure the TTL (in seconds) for caching queryables. Default is `1800` seconds (30 minutes) to balance performance and freshness of queryables data. [#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) +- Added optional `/catalogs` route support to enable federated hierarchical catalog browsing and navigation. [#547](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/547) + +- Added DELETE `/catalogs/{catalog_id}/collections/{collection_id}` endpoint to support removing collections from catalogs. When a collection belongs to multiple catalogs, it removes only the specified catalog from the collection's parent_ids. When a collection belongs to only one catalog, the collection is deleted entirely. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) + +- Added `parent_ids` internal field to collections to support multi-catalog hierarchies. Collections can now belong to multiple catalogs, with parent catalog IDs stored in this field for efficient querying and management. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) + ### Changed ### Fixed