diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a7bc3a2..c0497770 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +- Added default environment variable `STAC_ITEM_LIMIT` to SFEOS for result limiting of returned items and STAC collections [#419](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/419) ## [v6.2.0] - 2025-08-27 diff --git a/README.md b/README.md index 41af2591..a864b3d5 100644 --- a/README.md +++ b/README.md @@ -227,6 +227,7 @@ You can customize additional settings in your `.env` file: | `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional | | `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional | | `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional | +| `STAC_ITEM_LIMIT` | Sets the environment variable for result limiting to SFEOS for the number of returned items and STAC collections. | `10` | Optional | > [!NOTE] > The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch. diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 07b17890..8e7da91b 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -1,6 +1,7 @@ """Core client.""" import logging +import os from datetime import datetime as datetime_type from datetime import timezone from enum import Enum @@ -234,7 +235,7 @@ async def all_collections(self, **kwargs) -> stac_types.Collections: """ request = kwargs["request"] base_url = str(request.base_url) - limit = int(request.query_params.get("limit", 10)) + limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) token = request.query_params.get("token") collections, next_token = await self.database.get_all_collections( @@ -285,7 +286,7 @@ async def item_collection( collection_id: str, bbox: Optional[BBox] = None, datetime: Optional[str] = None, - limit: Optional[int] = 10, + limit: Optional[int] = None, token: Optional[str] = None, **kwargs, ) -> stac_types.ItemCollection: @@ -295,7 +296,7 @@ async def item_collection( collection_id (str): The identifier of the collection to read items from. bbox (Optional[BBox]): The bounding box to filter items by. datetime (Optional[str]): The datetime range to filter items by. - limit (int): The maximum number of items to return. The default value is 10. + limit (int): The maximum number of items to return. token (str): A token used for pagination. request (Request): The incoming request. @@ -341,6 +342,7 @@ async def item_collection( search = self.database.apply_bbox_filter(search=search, bbox=bbox) + limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) items, maybe_count, next_token = await self.database.execute_search( search=search, limit=limit, @@ -393,7 +395,7 @@ async def get_search( ids: Optional[List[str]] = None, bbox: Optional[BBox] = None, datetime: Optional[str] = None, - limit: Optional[int] = 10, + limit: Optional[int] = None, query: Optional[str] = None, token: Optional[str] = None, fields: Optional[List[str]] = None, @@ -426,6 +428,7 @@ async def get_search( Raises: HTTPException: If any error occurs while searching the catalog. """ + limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) base_args = { "collections": collections, "ids": ids, diff --git a/stac_fastapi/tests/api/test_api.py b/stac_fastapi/tests/api/test_api.py index a9de4460..c07efbd2 100644 --- a/stac_fastapi/tests/api/test_api.py +++ b/stac_fastapi/tests/api/test_api.py @@ -1470,3 +1470,73 @@ def create_items(date_prefix: str, start_day: int, count: int) -> dict: f"/collections/{collection_id}/items/{base_item['id']}", json=item_data ) assert response.json()["properties"]["platform"] == "Updated platform via PUT" + + +@pytest.mark.asyncio +async def test_collections_limit_env_variable(app_client, txn_client, load_test_data): + limit = "5" + os.environ["STAC_ITEM_LIMIT"] = limit + item = load_test_data("test_collection.json") + + for i in range(10): + test_collection = item.copy() + test_collection["id"] = f"test-collection-env-{i}" + test_collection["title"] = f"Test Collection Env {i}" + await create_collection(txn_client, test_collection) + + resp = await app_client.get("/collections") + assert resp.status_code == 200 + resp_json = resp.json() + assert int(limit) == len(resp_json["collections"]) + + +@pytest.mark.asyncio +async def test_search_collection_limit_env_variable( + app_client, txn_client, load_test_data +): + limit = "5" + os.environ["STAC_ITEM_LIMIT"] = limit + + test_collection = load_test_data("test_collection.json") + test_collection_id = "test-collection-search-limit" + test_collection["id"] = test_collection_id + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + item["collection"] = test_collection_id + + for i in range(10): + test_item = item.copy() + test_item["id"] = f"test-item-search-{i}" + await create_item(txn_client, test_item) + + resp = await app_client.get("/search", params={"collections": [test_collection_id]}) + assert resp.status_code == 200 + resp_json = resp.json() + assert int(limit) == len(resp_json["features"]) + + +@pytest.mark.asyncio +async def test_collection_items_limit_env_variable( + app_client, txn_client, load_test_data +): + limit = "5" + os.environ["STAC_ITEM_LIMIT"] = limit + + test_collection = load_test_data("test_collection.json") + test_collection_id = "test-collection-items-limit" + test_collection["id"] = test_collection_id + await create_collection(txn_client, test_collection) + + item = load_test_data("test_item.json") + item["collection"] = test_collection_id + + for i in range(10): + test_item = item.copy() + test_item["id"] = f"test-item-collection-{i}" + await create_item(txn_client, test_item) + + resp = await app_client.get(f"/collections/{test_collection_id}/items") + assert resp.status_code == 200 + resp_json = resp.json() + assert int(limit) == len(resp_json["features"])