From 9baebae459dbe71521d21034f51f46f02cc4f154 Mon Sep 17 00:00:00 2001 From: Marcin Niemyjski Date: Thu, 9 Oct 2025 16:54:57 +0200 Subject: [PATCH 1/5] working demo updated by precommit --- CHANGELOG.md | 1 + README.md | 1 + .../sfeos_helpers/filter/README.md | 33 +++++++++- .../sfeos_helpers/filter/client.py | 60 ++++++++++++++----- 4 files changed, 77 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0f028bc..1101bbea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - CloudFerro logo to sponsors and supporters list [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) - Latest news section to README [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) +- Environment variable `EXCLUDED_FROM_QUERYABLES` to exclude specific fields from queryables endpoint and filtering. Supports comma-separated list of fully qualified field names (e.g., `properties.auth:schemes,properties.storage:schemes`) ### Changed diff --git a/README.md b/README.md index b87bd21b..534f7662 100644 --- a/README.md +++ b/README.md @@ -313,6 +313,7 @@ You can customize additional settings in your `.env` file: | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `ENV_MAX_LIMIT` | Configures the environment variable in SFEOS to override the default `MAX_LIMIT`, which controls the limit parameter for returned items and STAC collections. | `10,000` | Optional | | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional | +| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | > [!NOTE] > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch. diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md index d3b09167..98b0a150 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md @@ -9,19 +9,48 @@ between the two implementations. The filter package is organized into three main modules: - **cql2.py**: Contains functions for converting CQL2 patterns to Elasticsearch/OpenSearch compatible formats + - [cql2_like_to_es](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:59:0-75:5): Converts CQL2 "LIKE" characters to Elasticsearch "wildcard" characters - - [_replace_like_patterns](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:51:0-56:71): Helper function for pattern replacement + - [\_replace_like_patterns](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:51:0-56:71): Helper function for pattern replacement - **transform.py**: Contains functions for transforming CQL2 queries to Elasticsearch/OpenSearch query DSL + - [to_es_field](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:83:0-93:47): Maps field names using queryables mapping - [to_es](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:96:0-201:13): Transforms CQL2 query structures to Elasticsearch/OpenSearch query DSL - **client.py**: Contains the base filter client implementation - [EsAsyncBaseFiltersClient](cci:2://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:209:0-293:25): Base class for implementing the STAC filter extension +## Configuration + +### Excluding Fields from Queryables + +You can exclude specific fields from being exposed in the queryables endpoint and from filtering by setting the `EXCLUDED_FROM_QUERYABLES` environment variable. This is useful for hiding sensitive or internal fields that should not be queryable by API users. + +**Environment Variable:** + +```bash +EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,properties.storage:schemes,properties.internal:metadata" +``` + +**Format:** + +- Comma-separated list of fully qualified field names +- Use the full path including the `properties.` prefix for item properties +- Example field names: + - `properties.auth:schemes` + - `properties.storage:schemes` + +**Behavior:** + +- Excluded fields will not appear in the queryables response +- Excluded fields and their nested children will be skipped during field traversal +- Both the field itself and any nested properties will be excluded + ## Usage Import the necessary components from the filter package: ```python -from stac_fastapi.sfeos_helpers.filter import cql2_like_to_es, to_es, EsAsyncBaseFiltersClient \ No newline at end of file +from stac_fastapi.sfeos_helpers.filter import cql2_like_to_es, to_es, EsAsyncBaseFiltersClient +``` diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py index 98081bfd..4f122ca2 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py @@ -1,7 +1,8 @@ """Filter client implementation for Elasticsearch/OpenSearch.""" +import os from collections import deque -from typing import Any, Dict, Optional, Tuple +from typing import Any import attr from fastapi import Request @@ -18,9 +19,27 @@ class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient): database: BaseDatabaseLogic = attr.ib() + @staticmethod + def _get_excluded_from_queryables() -> set[str]: + """Get fields to exclude from queryables endpoint and filtering. + + Reads from EXCLUDED_FROM_QUERYABLES environment variable. + Supports comma-separated list of field names. + + Example: + EXCLUDED_FROM_QUERYABLES="auth:schemes,storage:schemes" + + Returns: + Set[str]: Set of field names to exclude from queryables + """ + excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") + if not excluded: + return set() + return {field.strip() for field in excluded.split(",") if field.strip()} + async def get_queryables( - self, collection_id: Optional[str] = None, **kwargs - ) -> Dict[str, Any]: + self, collection_id: str | None = None, **kwargs + ) -> dict[str, Any]: """Get the queryables available for the given collection_id. If collection_id is None, returns the intersection of all @@ -38,21 +57,23 @@ async def get_queryables( Returns: Dict[str, Any]: A dictionary containing the queryables for the given collection. """ - request: Optional[Request] = kwargs.get("request") - url_str: str = str(request.url) if request else "" - queryables: Dict[str, Any] = { + request: Request | None = kwargs.get("request") + url_str = str(request.url) if request else "" + + queryables: dict[str, Any] = { "$schema": "https://json-schema.org/draft-07/schema", - "$id": f"{url_str}", + "$id": url_str, "type": "object", "title": "Queryables for STAC API", "description": "Queryable names for the STAC API Item Search filter.", "properties": DEFAULT_QUERYABLES, "additionalProperties": True, } + if not collection_id: return queryables - properties: Dict[str, Any] = queryables["properties"].copy() + properties = queryables["properties"].copy() queryables.update( { "properties": properties, @@ -62,8 +83,9 @@ async def get_queryables( mapping_data = await self.database.get_items_mapping(collection_id) mapping_properties = next(iter(mapping_data.values()))["mappings"]["properties"] - stack: deque[Tuple[str, Dict[str, Any]]] = deque(mapping_properties.items()) - enum_fields: Dict[str, Dict[str, Any]] = {} + stack: deque[tuple[str, dict[str, Any]]] = deque(mapping_properties.items()) + enum_fields: dict[str, dict[str, Any]] = {} + excluded_fields = self._get_excluded_from_queryables() while stack: field_fqn, field_def = stack.popleft() @@ -75,11 +97,16 @@ async def get_queryables( (f"{field_fqn}.{k}", v) for k, v in field_properties.items() if v.get("enabled", True) + and f"{field_fqn}.{k}" not in excluded_fields ) # Skip non-indexed or disabled fields field_type = field_def.get("type") - if not field_type or not field_def.get("enabled", True): + if ( + not field_type + or not field_def.get("enabled", True) + or field_fqn in excluded_fields + ): continue # Fields in Item Properties should be exposed with their un-prefixed names, @@ -88,7 +115,7 @@ async def get_queryables( field_name = field_fqn.removeprefix("properties.") # Generate field properties - field_result = ALL_QUERYABLES.get(field_name, {}) + field_result = ALL_QUERYABLES.get(field_name, {}).copy() properties[field_name] = field_result field_name_human = field_name.replace("_", " ").title() @@ -104,9 +131,10 @@ async def get_queryables( enum_fields[field_fqn] = field_result if enum_fields: - for field_fqn, unique_values in ( - await self.database.get_items_unique_values(collection_id, enum_fields) - ).items(): - enum_fields[field_fqn]["enum"] = unique_values + unique_values = await self.database.get_items_unique_values( + collection_id, enum_fields + ) + for field_fqn, values in unique_values.items(): + enum_fields[field_fqn]["enum"] = values return queryables From a2a49e4dfe4837f9f8220513d5368bf834c4fb68 Mon Sep 17 00:00:00 2001 From: Marcin Niemyjski Date: Wed, 22 Oct 2025 14:12:20 +0200 Subject: [PATCH 2/5] supprot for python version lesser than 3.10 --- .../stac_fastapi/sfeos_helpers/filter/client.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py index 4f122ca2..0791cf81 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py @@ -2,7 +2,7 @@ import os from collections import deque -from typing import Any +from typing import Any, Optional import attr from fastapi import Request @@ -38,7 +38,9 @@ def _get_excluded_from_queryables() -> set[str]: return {field.strip() for field in excluded.split(",") if field.strip()} async def get_queryables( - self, collection_id: str | None = None, **kwargs + self, + collection_id: str | None = None, + **kwargs: Any, ) -> dict[str, Any]: """Get the queryables available for the given collection_id. @@ -57,7 +59,7 @@ async def get_queryables( Returns: Dict[str, Any]: A dictionary containing the queryables for the given collection. """ - request: Request | None = kwargs.get("request") + request: Optional[Request] = kwargs.get("request") # noqa: UP045 url_str = str(request.url) if request else "" queryables: dict[str, Any] = { From 9e8f8a90402b79cce92db9bc06c932da27b7fa3f Mon Sep 17 00:00:00 2001 From: Marcin Niemyjski Date: Wed, 22 Oct 2025 14:14:32 +0200 Subject: [PATCH 3/5] fix --- .../sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py index 0791cf81..ac19d65d 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py @@ -39,7 +39,7 @@ def _get_excluded_from_queryables() -> set[str]: async def get_queryables( self, - collection_id: str | None = None, + collection_id: Optional[str] = None, # noqa: UP045 **kwargs: Any, ) -> dict[str, Any]: """Get the queryables available for the given collection_id. From d26e5732f0abb38c26eb52d2fa1f4f1718d7f4a4 Mon Sep 17 00:00:00 2001 From: Marcin Niemyjski Date: Wed, 22 Oct 2025 14:33:39 +0200 Subject: [PATCH 4/5] updated readme --- CHANGELOG.md | 2 ++ README.md | 25 ++++++++++++++++++ .../sfeos_helpers/filter/README.md | 26 ------------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80b34ad8..f8a2f3e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- CloudFerro logo to sponsors and supporters list [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) +- Latest news section to README [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) - Environment variable `EXCLUDED_FROM_QUERYABLES` to exclude specific fields from queryables endpoint and filtering. Supports comma-separated list of fully qualified field names (e.g., `properties.auth:schemes,properties.storage:schemes`) [#489](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/489) ### Changed diff --git a/README.md b/README.md index 2d442c6e..42837431 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI - [Using Pre-built Docker Images](#using-pre-built-docker-images) - [Using Docker Compose](#using-docker-compose) - [Configuration Reference](#configuration-reference) + - [Excluding Fields from Queryables](#excluding-fields-from-queryables) - [Datetime-Based Index Management](#datetime-based-index-management) - [Overview](#overview) - [When to Use](#when-to-use) @@ -342,6 +343,30 @@ You can customize additional settings in your `.env` file: > [!NOTE] > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch. +## Excluding Fields from Queryables + +You can exclude specific fields from being exposed in the queryables endpoint and from filtering by setting the `EXCLUDED_FROM_QUERYABLES` environment variable. This is useful for hiding sensitive or internal fields that should not be queryable by API users. + +**Environment Variable:** + +```bash +EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,properties.storage:schemes,properties.internal:metadata" +``` + +**Format:** + +- Comma-separated list of fully qualified field names +- Use the full path including the `properties.` prefix for item properties +- Example field names: + - `properties.auth:schemes` + - `properties.storage:schemes` + +**Behavior:** + +- Excluded fields will not appear in the queryables response +- Excluded fields and their nested children will be skipped during field traversal +- Both the field itself and any nested properties will be excluded + ## Datetime-Based Index Management ### Overview diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md index 98b0a150..2075ff87 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md @@ -21,32 +21,6 @@ The filter package is organized into three main modules: - **client.py**: Contains the base filter client implementation - [EsAsyncBaseFiltersClient](cci:2://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:209:0-293:25): Base class for implementing the STAC filter extension -## Configuration - -### Excluding Fields from Queryables - -You can exclude specific fields from being exposed in the queryables endpoint and from filtering by setting the `EXCLUDED_FROM_QUERYABLES` environment variable. This is useful for hiding sensitive or internal fields that should not be queryable by API users. - -**Environment Variable:** - -```bash -EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,properties.storage:schemes,properties.internal:metadata" -``` - -**Format:** - -- Comma-separated list of fully qualified field names -- Use the full path including the `properties.` prefix for item properties -- Example field names: - - `properties.auth:schemes` - - `properties.storage:schemes` - -**Behavior:** - -- Excluded fields will not appear in the queryables response -- Excluded fields and their nested children will be skipped during field traversal -- Both the field itself and any nested properties will be excluded - ## Usage Import the necessary components from the filter package: From edad9f18161575352941f11bf79504a9b05e6f5a Mon Sep 17 00:00:00 2001 From: Marcin Niemyjski Date: Wed, 22 Oct 2025 15:00:18 +0200 Subject: [PATCH 5/5] added test for excluded attribute --- stac_fastapi/tests/extensions/test_filter.py | 50 ++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/stac_fastapi/tests/extensions/test_filter.py b/stac_fastapi/tests/extensions/test_filter.py index e54d198e..d60a13be 100644 --- a/stac_fastapi/tests/extensions/test_filter.py +++ b/stac_fastapi/tests/extensions/test_filter.py @@ -674,3 +674,53 @@ async def test_queryables_enum_platform( # Clean up r = await app_client.delete(f"/collections/{collection_id}") r.raise_for_status() + + +@pytest.mark.asyncio +async def test_queryables_excluded_fields( + app_client: AsyncClient, + load_test_data: Callable[[str], Dict], + monkeypatch: pytest.MonkeyPatch, +): + """Test that fields can be excluded from queryables using EXCLUDED_FROM_QUERYABLES.""" + # Arrange + monkeypatch.setenv("DATABASE_REFRESH", "true") + monkeypatch.setenv( + "EXCLUDED_FROM_QUERYABLES", "properties.platform,properties.instrument" + ) + + # Create collection + collection_data = load_test_data("test_collection.json") + collection_id = collection_data["id"] = f"exclude-test-collection-{uuid.uuid4()}" + r = await app_client.post("/collections", json=collection_data) + r.raise_for_status() + + # Create an item + item_data = load_test_data("test_item.json") + item_data["id"] = "exclude-test-item" + item_data["collection"] = collection_id + item_data["properties"]["platform"] = "landsat-8" + item_data["properties"]["instrument"] = "OLI_TIRS" + r = await app_client.post(f"/collections/{collection_id}/items", json=item_data) + r.raise_for_status() + + # Act + queryables = ( + (await app_client.get(f"/collections/{collection_id}/queryables")) + .raise_for_status() + .json() + ) + + # Assert + # Excluded fields should NOT be in queryables + properties = queryables["properties"] + assert "platform" not in properties + assert "instrument" not in properties + + # Other fields should still be present + assert "datetime" in properties + assert "gsd" in properties + + # Clean up + r = await app_client.delete(f"/collections/{collection_id}") + r.raise_for_status()