From 51ac649ba4cffc7fb7d38303e102739f79bea5b9 Mon Sep 17 00:00:00 2001 From: Yuri Zmytrakov Date: Wed, 27 Aug 2025 11:21:58 +0200 Subject: [PATCH 1/3] test: add STAC_ITEM_LIMIT env var tests for results limiting Add tests to verify the STAC_ITEM_LIMIT environment variable correctly limits returned items in all the following endpoints: - Collections list (`/collections`) - Search results (`/search`) - Collection items (`/collections/{id}/items`) Ensures the pagination and result limiting behaves as expected in the API. --- stac_fastapi/tests/api/test_api.py | 70 ++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index a9de4460..c07efbd2 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -1470,3 +1470,73 @@ def create_items(date_prefix: str, start_day: int, count: int) -> dict: f"/collections/{collection_id}/items/{base_item['id']}", json=item_data ) assert response.json()["properties"]["platform"] == "Updated platform via PUT" + + +@pytest.mark.asyncio +async def test_collections_limit_env_variable(app_client, txn_client, load_test_data): + limit = "5" + os.environ["STAC_ITEM_LIMIT"] = limit + item = load_test_data("test_collection.json") + + for i in range(10): + test_collection = item.copy() + test_collection["id"] = f"test-collection-env-{i}" + test_collection["title"] = f"Test Collection Env {i}" + await create_collection(txn_client, test_collection) + + resp = await app_client.get("/collections") + assert resp.status_code == 200 + resp_json = resp.json() + assert int(limit) == len(resp_json["collections"]) + + +@pytest.mark.asyncio +async def test_search_collection_limit_env_variable( + app_client, txn_client, load_test_data +): + limit = "5" + os.environ["STAC_ITEM_LIMIT"] = limit + + test_collection = load_test_data("test_collection.json") + test_collection_id = "test-collection-search-limit" + test_collection["id"] = test_collection_id + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + item["collection"] = test_collection_id + + for i in range(10): + test_item = item.copy() + test_item["id"] = f"test-item-search-{i}" + await create_item(txn_client, test_item) + + resp = await app_client.get("/search", params={"collections": [test_collection_id]}) + assert resp.status_code == 200 + resp_json = resp.json() + assert int(limit) == len(resp_json["features"]) + + +@pytest.mark.asyncio +async def test_collection_items_limit_env_variable( + app_client, txn_client, load_test_data +): + limit = "5" + os.environ["STAC_ITEM_LIMIT"] = limit + + test_collection = load_test_data("test_collection.json") + test_collection_id = "test-collection-items-limit" + test_collection["id"] = test_collection_id + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + item["collection"] = test_collection_id + + for i in range(10): + test_item = item.copy() + test_item["id"] = f"test-item-collection-{i}" + await create_item(txn_client, test_item) + + resp = await app_client.get(f"/collections/{test_collection_id}/items") + assert resp.status_code == 200 + resp_json = resp.json() + assert int(limit) == len(resp_json["features"]) From f7a7f71bf6eb91cbb84b637b8194b689717c8fa4 Mon Sep 17 00:00:00 2001 From: Yuri Zmytrakov Date: Wed, 27 Aug 2025 11:48:56 +0200 Subject: [PATCH 2/3] Add STAC_ITEM_LIMIT environment variable for result limiting This commit introduces the STAC_ITEM_LIMIT environment variable to control the maximum number of items returned by key API endpoints: - GET /collections - Limits collections returned - GET /collections/{collection_id}/items - Limits items in a collection - GET /search - Limits search results --- stac_fastapi/core/stac_fastapi/core/core.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 07b17890..8e7da91b 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -1,6 +1,7 @@ """Core client.""" import logging +import os from datetime import datetime as datetime_type from datetime import timezone from enum import Enum @@ -234,7 +235,7 @@ async def all_collections(self, **kwargs) -> stac_types.Collections: """ request = kwargs["request"] base_url = str(request.base_url) - limit = int(request.query_params.get("limit", 10)) + limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) token = request.query_params.get("token") collections, next_token = await self.database.get_all_collections( @@ -285,7 +286,7 @@ async def item_collection( collection_id: str, bbox: Optional[BBox] = None, datetime: Optional[str] = None, - limit: Optional[int] = 10, + limit: Optional[int] = None, token: Optional[str] = None, **kwargs, ) -> stac_types.ItemCollection: @@ -295,7 +296,7 @@ async def item_collection( collection_id (str): The identifier of the collection to read items from. bbox (Optional[BBox]): The bounding box to filter items by. datetime (Optional[str]): The datetime range to filter items by. - limit (int): The maximum number of items to return. The default value is 10. + limit (int): The maximum number of items to return. token (str): A token used for pagination. request (Request): The incoming request. @@ -341,6 +342,7 @@ async def item_collection( search = self.database.apply_bbox_filter(search=search, bbox=bbox) + limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) items, maybe_count, next_token = await self.database.execute_search( search=search, limit=limit, @@ -393,7 +395,7 @@ async def get_search( ids: Optional[List[str]] = None, bbox: Optional[BBox] = None, datetime: Optional[str] = None, - limit: Optional[int] = 10, + limit: Optional[int] = None, query: Optional[str] = None, token: Optional[str] = None, fields: Optional[List[str]] = None, @@ -426,6 +428,7 @@ async def get_search( Raises: HTTPException: If any error occurs while searching the catalog. """ + limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) base_args = { "collections": collections, "ids": ids, From 3f5c97871a37838f314abc25ee596ee51df8efd8 Mon Sep 17 00:00:00 2001 From: Yuri Zmytrakov Date: Thu, 28 Aug 2025 16:36:02 +0200 Subject: [PATCH 3/3] docs: docs: Add user instructions for STAC_ITEM_LIMIT env variable --- CHANGELOG.md | 4 ++++ README.md | 1 + 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52ce5f2f..2b6d4f40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +- Added default environment variable `STAC_ITEM_LIMIT` to SFEOS for result limiting of returned items and STAC collections [#419](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/419) + +## [v6.2.0] - 2025-08-27 + ### Added - Added comprehensive index management system with dynamic selection and insertion strategies for improved performance and scalability [#405](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/405) diff --git a/README.md b/README.md index 977a351a..297764ce 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,7 @@ You can customize additional settings in your `.env` file: | `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional | | `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional | | `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional | +| `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional | > [!NOTE] > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch.