diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b775bb9..f8a2f3e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- CloudFerro logo to sponsors and supporters list [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) +- Latest news section to README [#485](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/485) +- Environment variable `EXCLUDED_FROM_QUERYABLES` to exclude specific fields from queryables endpoint and filtering. Supports comma-separated list of fully qualified field names (e.g., `properties.auth:schemes,properties.storage:schemes`) [#489](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/489) + ### Changed ### Fixed diff --git a/README.md b/README.md index f23300ae..42837431 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI - [Using Pre-built Docker Images](#using-pre-built-docker-images) - [Using Docker Compose](#using-docker-compose) - [Configuration Reference](#configuration-reference) + - [Excluding Fields from Queryables](#excluding-fields-from-queryables) - [Datetime-Based Index Management](#datetime-based-index-management) - [Overview](#overview) - [When to Use](#when-to-use) @@ -337,10 +338,35 @@ You can customize additional settings in your `.env` file: | `STAC_DEFAULT_ITEM_LIMIT` | Configures the default number of STAC items returned when no limit parameter is specified in the request. | `10` | Optional | | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional | +| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | > [!NOTE] > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch. +## Excluding Fields from Queryables + +You can exclude specific fields from being exposed in the queryables endpoint and from filtering by setting the `EXCLUDED_FROM_QUERYABLES` environment variable. This is useful for hiding sensitive or internal fields that should not be queryable by API users. + +**Environment Variable:** + +```bash +EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,properties.storage:schemes,properties.internal:metadata" +``` + +**Format:** + +- Comma-separated list of fully qualified field names +- Use the full path including the `properties.` prefix for item properties +- Example field names: + - `properties.auth:schemes` + - `properties.storage:schemes` + +**Behavior:** + +- Excluded fields will not appear in the queryables response +- Excluded fields and their nested children will be skipped during field traversal +- Both the field itself and any nested properties will be excluded + ## Datetime-Based Index Management ### Overview diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md index d3b09167..2075ff87 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md @@ -9,10 +9,12 @@ between the two implementations. The filter package is organized into three main modules: - **cql2.py**: Contains functions for converting CQL2 patterns to Elasticsearch/OpenSearch compatible formats + - [cql2_like_to_es](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:59:0-75:5): Converts CQL2 "LIKE" characters to Elasticsearch "wildcard" characters - - [_replace_like_patterns](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:51:0-56:71): Helper function for pattern replacement + - [\_replace_like_patterns](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:51:0-56:71): Helper function for pattern replacement - **transform.py**: Contains functions for transforming CQL2 queries to Elasticsearch/OpenSearch query DSL + - [to_es_field](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:83:0-93:47): Maps field names using queryables mapping - [to_es](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:96:0-201:13): Transforms CQL2 query structures to Elasticsearch/OpenSearch query DSL @@ -24,4 +26,5 @@ The filter package is organized into three main modules: Import the necessary components from the filter package: ```python -from stac_fastapi.sfeos_helpers.filter import cql2_like_to_es, to_es, EsAsyncBaseFiltersClient \ No newline at end of file +from stac_fastapi.sfeos_helpers.filter import cql2_like_to_es, to_es, EsAsyncBaseFiltersClient +``` diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py index 98081bfd..ac19d65d 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py @@ -1,7 +1,8 @@ """Filter client implementation for Elasticsearch/OpenSearch.""" +import os from collections import deque -from typing import Any, Dict, Optional, Tuple +from typing import Any, Optional import attr from fastapi import Request @@ -18,9 +19,29 @@ class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient): database: BaseDatabaseLogic = attr.ib() + @staticmethod + def _get_excluded_from_queryables() -> set[str]: + """Get fields to exclude from queryables endpoint and filtering. + + Reads from EXCLUDED_FROM_QUERYABLES environment variable. + Supports comma-separated list of field names. + + Example: + EXCLUDED_FROM_QUERYABLES="auth:schemes,storage:schemes" + + Returns: + Set[str]: Set of field names to exclude from queryables + """ + excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") + if not excluded: + return set() + return {field.strip() for field in excluded.split(",") if field.strip()} + async def get_queryables( - self, collection_id: Optional[str] = None, **kwargs - ) -> Dict[str, Any]: + self, + collection_id: Optional[str] = None, # noqa: UP045 + **kwargs: Any, + ) -> dict[str, Any]: """Get the queryables available for the given collection_id. If collection_id is None, returns the intersection of all @@ -38,21 +59,23 @@ async def get_queryables( Returns: Dict[str, Any]: A dictionary containing the queryables for the given collection. """ - request: Optional[Request] = kwargs.get("request") - url_str: str = str(request.url) if request else "" - queryables: Dict[str, Any] = { + request: Optional[Request] = kwargs.get("request") # noqa: UP045 + url_str = str(request.url) if request else "" + + queryables: dict[str, Any] = { "$schema": "https://json-schema.org/draft-07/schema", - "$id": f"{url_str}", + "$id": url_str, "type": "object", "title": "Queryables for STAC API", "description": "Queryable names for the STAC API Item Search filter.", "properties": DEFAULT_QUERYABLES, "additionalProperties": True, } + if not collection_id: return queryables - properties: Dict[str, Any] = queryables["properties"].copy() + properties = queryables["properties"].copy() queryables.update( { "properties": properties, @@ -62,8 +85,9 @@ async def get_queryables( mapping_data = await self.database.get_items_mapping(collection_id) mapping_properties = next(iter(mapping_data.values()))["mappings"]["properties"] - stack: deque[Tuple[str, Dict[str, Any]]] = deque(mapping_properties.items()) - enum_fields: Dict[str, Dict[str, Any]] = {} + stack: deque[tuple[str, dict[str, Any]]] = deque(mapping_properties.items()) + enum_fields: dict[str, dict[str, Any]] = {} + excluded_fields = self._get_excluded_from_queryables() while stack: field_fqn, field_def = stack.popleft() @@ -75,11 +99,16 @@ async def get_queryables( (f"{field_fqn}.{k}", v) for k, v in field_properties.items() if v.get("enabled", True) + and f"{field_fqn}.{k}" not in excluded_fields ) # Skip non-indexed or disabled fields field_type = field_def.get("type") - if not field_type or not field_def.get("enabled", True): + if ( + not field_type + or not field_def.get("enabled", True) + or field_fqn in excluded_fields + ): continue # Fields in Item Properties should be exposed with their un-prefixed names, @@ -88,7 +117,7 @@ async def get_queryables( field_name = field_fqn.removeprefix("properties.") # Generate field properties - field_result = ALL_QUERYABLES.get(field_name, {}) + field_result = ALL_QUERYABLES.get(field_name, {}).copy() properties[field_name] = field_result field_name_human = field_name.replace("_", " ").title() @@ -104,9 +133,10 @@ async def get_queryables( enum_fields[field_fqn] = field_result if enum_fields: - for field_fqn, unique_values in ( - await self.database.get_items_unique_values(collection_id, enum_fields) - ).items(): - enum_fields[field_fqn]["enum"] = unique_values + unique_values = await self.database.get_items_unique_values( + collection_id, enum_fields + ) + for field_fqn, values in unique_values.items(): + enum_fields[field_fqn]["enum"] = values return queryables diff --git a/stac_fastapi/tests/extensions/test_filter.py b/stac_fastapi/tests/extensions/test_filter.py index e54d198e..d60a13be 100644 --- a/stac_fastapi/tests/extensions/test_filter.py +++ b/stac_fastapi/tests/extensions/test_filter.py @@ -674,3 +674,53 @@ async def test_queryables_enum_platform( # Clean up r = await app_client.delete(f"/collections/{collection_id}") r.raise_for_status() + + +@pytest.mark.asyncio +async def test_queryables_excluded_fields( + app_client: AsyncClient, + load_test_data: Callable[[str], Dict], + monkeypatch: pytest.MonkeyPatch, +): + """Test that fields can be excluded from queryables using EXCLUDED_FROM_QUERYABLES.""" + # Arrange + monkeypatch.setenv("DATABASE_REFRESH", "true") + monkeypatch.setenv( + "EXCLUDED_FROM_QUERYABLES", "properties.platform,properties.instrument" + ) + + # Create collection + collection_data = load_test_data("test_collection.json") + collection_id = collection_data["id"] = f"exclude-test-collection-{uuid.uuid4()}" + r = await app_client.post("/collections", json=collection_data) + r.raise_for_status() + + # Create an item + item_data = load_test_data("test_item.json") + item_data["id"] = "exclude-test-item" + item_data["collection"] = collection_id + item_data["properties"]["platform"] = "landsat-8" + item_data["properties"]["instrument"] = "OLI_TIRS" + r = await app_client.post(f"/collections/{collection_id}/items", json=item_data) + r.raise_for_status() + + # Act + queryables = ( + (await app_client.get(f"/collections/{collection_id}/queryables")) + .raise_for_status() + .json() + ) + + # Assert + # Excluded fields should NOT be in queryables + properties = queryables["properties"] + assert "platform" not in properties + assert "instrument" not in properties + + # Other fields should still be present + assert "datetime" in properties + assert "gsd" in properties + + # Clean up + r = await app_client.delete(f"/collections/{collection_id}") + r.raise_for_status()