Skip to content

Commit d167a29

Browse files
committed
bbox scratch
1 parent f30b4e4 commit d167a29

File tree

11 files changed

+643
-6
lines changed

11 files changed

+643
-6
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
99

1010
### Added
1111

12+
- Spatial search support for collections via `bbox` parameter on `/collections` endpoint. Collections are now indexed with a `bbox_shape` field (GeoJSON polygon) derived from their spatial extent for efficient geospatial queries.
13+
- Migration scripts (`update_collections_mapping.py` and `recreate_collections_index.py`) to help add `bbox_shape` field to existing deployments.
14+
1215
### Changed
1316

1417
### Fixed
1518

19+
- Issue where token was not being passed from request to POST collections search logic
20+
- Issue where datetime param was not being passed from POST collections search logic to Elasticsearch
21+
1622
[v6.5.0] - 2025-09-29
1723

1824
### Added

compose.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ services:
2222
- ES_VERIFY_CERTS=false
2323
- BACKEND=elasticsearch
2424
- DATABASE_REFRESH=true
25+
- ENABLE_COLLECTIONS_SEARCH_ROUTE=true
2526
ports:
2627
- "8080:8080"
2728
volumes:
@@ -56,6 +57,7 @@ services:
5657
- ES_VERIFY_CERTS=false
5758
- BACKEND=opensearch
5859
- STAC_FASTAPI_RATE_LIMIT=200/minute
60+
- ENABLE_COLLECTIONS_SEARCH_ROUTE=true
5961
ports:
6062
- "8082:8082"
6163
volumes:

recreate_collections_index.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to delete and recreate the collections index.
4+
5+
WARNING: This will DELETE all existing collections!
6+
Only use this in development environments.
7+
8+
Usage:
9+
python recreate_collections_index.py
10+
"""
11+
12+
import asyncio
13+
import os
14+
import sys
15+
16+
from stac_fastapi.sfeos_helpers.mappings import COLLECTIONS_INDEX
17+
18+
# Determine which backend to use
19+
BACKEND = os.getenv("BACKEND", "elasticsearch").lower()
20+
21+
if BACKEND == "opensearch":
22+
from stac_fastapi.opensearch.config import (
23+
AsyncOpensearchSettings as AsyncSearchSettings,
24+
)
25+
from stac_fastapi.opensearch.database_logic import (
26+
create_collection_index,
27+
create_index_templates,
28+
)
29+
else:
30+
from stac_fastapi.elasticsearch.config import (
31+
AsyncElasticsearchSettings as AsyncSearchSettings,
32+
)
33+
from stac_fastapi.elasticsearch.database_logic import (
34+
create_collection_index,
35+
create_index_templates,
36+
)
37+
38+
39+
async def recreate_index():
40+
"""Delete and recreate the collections index."""
41+
settings = AsyncSearchSettings()
42+
client = settings.create_client
43+
44+
print(f"Using backend: {BACKEND}")
45+
print(f"\n{'=' * 60}")
46+
print("WARNING: This will DELETE all existing collections!")
47+
print(f"{'=' * 60}\n")
48+
49+
# Check if running in production
50+
env = os.getenv("ENVIRONMENT", "development").lower()
51+
if env == "production":
52+
print("ERROR: This script should not be run in production!")
53+
print("Use update_collections_mapping.py instead.")
54+
sys.exit(1)
55+
56+
response = input("Are you sure you want to continue? (yes/no): ")
57+
if response.lower() != "yes":
58+
print("Aborted.")
59+
sys.exit(0)
60+
61+
try:
62+
# Delete the collections index
63+
index_name = f"{COLLECTIONS_INDEX}-000001"
64+
alias_name = COLLECTIONS_INDEX
65+
66+
print(f"\nDeleting index {index_name}...")
67+
exists = await client.indices.exists(index=index_name)
68+
if exists:
69+
await client.indices.delete(index=index_name)
70+
print(f"✓ Deleted index {index_name}")
71+
else:
72+
print(f"⊘ Index {index_name} does not exist")
73+
74+
# Check if alias exists and delete it
75+
alias_exists = await client.indices.exists_alias(name=alias_name)
76+
if alias_exists:
77+
print(f"Deleting alias {alias_name}...")
78+
await client.indices.delete_alias(
79+
index="_all", name=alias_name, ignore=[404]
80+
)
81+
print(f"✓ Deleted alias {alias_name}")
82+
83+
# Recreate index templates
84+
print("\nRecreating index templates...")
85+
await create_index_templates()
86+
print("✓ Index templates created")
87+
88+
# Recreate the collections index
89+
print("\nRecreating collections index...")
90+
await create_collection_index()
91+
print("✓ Collections index created")
92+
93+
# Verify the mapping includes bbox_shape
94+
print("\nVerifying mapping...")
95+
mapping = await client.indices.get_mapping(index=index_name)
96+
properties = mapping[index_name]["mappings"]["properties"]
97+
98+
if "bbox_shape" in properties:
99+
print(
100+
f"✓ bbox_shape field is present in mapping: {properties['bbox_shape']}"
101+
)
102+
else:
103+
print("✗ WARNING: bbox_shape field is NOT in the mapping!")
104+
105+
print("\n" + "=" * 60)
106+
print("Collections index successfully recreated!")
107+
print("You can now create collections with bbox_shape support.")
108+
print("=" * 60)
109+
110+
except Exception as e:
111+
print(f"\n✗ Error: {e}")
112+
import traceback
113+
114+
traceback.print_exc()
115+
sys.exit(1)
116+
finally:
117+
await client.close()
118+
119+
120+
if __name__ == "__main__":
121+
asyncio.run(recreate_index())

stac_fastapi/core/stac_fastapi/core/base_database_logic.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import abc
44
from typing import Any, Dict, Iterable, List, Optional, Tuple
55

6+
from stac_pydantic.shared import BBox
7+
68

79
class BaseDatabaseLogic(abc.ABC):
810
"""
@@ -19,6 +21,7 @@ async def get_all_collections(
1921
limit: int,
2022
request: Any = None,
2123
sort: Optional[List[Dict[str, Any]]] = None,
24+
bbox: Optional[BBox] = None,
2225
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
2326
"""Retrieve a list of collections from the database, supporting pagination.
2427
@@ -27,6 +30,7 @@ async def get_all_collections(
2730
limit (int): The number of results to return.
2831
request (Any, optional): The FastAPI request object. Defaults to None.
2932
sort (Optional[List[Dict[str, Any]]], optional): Optional sort parameter. Defaults to None.
33+
bbox (Optional[BBox], optional): Bounding box to filter collections by spatial extent. Defaults to None.
3034
3135
Returns:
3236
A tuple of (collections, next pagination token if any).

stac_fastapi/core/stac_fastapi/core/core.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,9 @@ async def all_collections(
300300
else:
301301
limit = 10
302302

303-
token = request.query_params.get("token")
303+
# Get token from query params only if not already provided (for GET requests)
304+
if token is None:
305+
token = request.query_params.get("token")
304306

305307
# Process fields parameter for filtering collection properties
306308
includes, excludes = set(), set()
@@ -400,6 +402,7 @@ async def all_collections(
400402
limit=limit,
401403
request=request,
402404
sort=sort,
405+
bbox=bbox,
403406
q=q_list,
404407
filter=parsed_filter,
405408
query=parsed_query,
@@ -501,6 +504,11 @@ async def post_all_collections(
501504
# Pass all parameters from search_request to all_collections
502505
return await self.all_collections(
503506
limit=search_request.limit if hasattr(search_request, "limit") else None,
507+
bbox=search_request.bbox if hasattr(search_request, "bbox") else None,
508+
datetime=search_request.datetime
509+
if hasattr(search_request, "datetime")
510+
else None,
511+
token=search_request.token if hasattr(search_request, "token") else None,
504512
fields=fields,
505513
sortby=sortby,
506514
filter_expr=search_request.filter

stac_fastapi/core/stac_fastapi/core/extensions/collections_search.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class CollectionsSearchRequest(ExtendedSearch):
1818
"""Extended search model for collections with free text search support."""
1919

2020
q: Optional[Union[str, List[str]]] = None
21+
token: Optional[str] = None
2122

2223

2324
class CollectionsSearchEndpointExtension(ApiExtension):

stac_fastapi/core/stac_fastapi/core/serializers.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Serializers."""
22

33
import abc
4+
import logging
45
from copy import deepcopy
56
from typing import Any, List, Optional
67

@@ -9,10 +10,12 @@
910

1011
from stac_fastapi.core.datetime_utils import now_to_rfc3339_str
1112
from stac_fastapi.core.models.links import CollectionLinks
12-
from stac_fastapi.core.utilities import get_bool_env
13+
from stac_fastapi.core.utilities import bbox2polygon, get_bool_env
1314
from stac_fastapi.types import stac as stac_types
1415
from stac_fastapi.types.links import ItemLinks, resolve_links
1516

17+
logger = logging.getLogger(__name__)
18+
1619

1720
@attr.s
1821
class Serializer(abc.ABC):
@@ -141,6 +144,53 @@ def stac_to_db(
141144
collection.get("links", []), str(request.base_url)
142145
)
143146

147+
# Convert bbox to bbox_shape for geospatial queries
148+
if "extent" in collection and "spatial" in collection["extent"]:
149+
spatial_extent = collection["extent"]["spatial"]
150+
if "bbox" in spatial_extent and spatial_extent["bbox"]:
151+
# Get the first bbox (collections can have multiple bboxes, but we use the first one)
152+
bbox = (
153+
spatial_extent["bbox"][0]
154+
if isinstance(spatial_extent["bbox"][0], list)
155+
else spatial_extent["bbox"]
156+
)
157+
collection_id = collection.get("id", "unknown")
158+
logger.debug(
159+
f"Converting bbox to bbox_shape for collection '{collection_id}': bbox={bbox}"
160+
)
161+
162+
if len(bbox) >= 4:
163+
# Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz])
164+
# For 2D polygon, we only need the x,y coordinates and discard altitude (z) values
165+
minx, miny = bbox[0], bbox[1]
166+
if len(bbox) == 4:
167+
# 2D bbox: [minx, miny, maxx, maxy]
168+
maxx, maxy = bbox[2], bbox[3]
169+
logger.debug(
170+
f"Collection '{collection_id}': Processing 2D bbox"
171+
)
172+
else:
173+
# 3D bbox: [minx, miny, minz, maxx, maxy, maxz]
174+
# Extract indices 3,4 for maxx,maxy - discarding altitude at indices 2 (minz) and 5 (maxz)
175+
maxx, maxy = bbox[3], bbox[4]
176+
logger.debug(
177+
f"Collection '{collection_id}': Processing 3D bbox, discarding altitude values at indices 2 and 5"
178+
)
179+
180+
# Convert bbox to GeoJSON polygon
181+
bbox_polygon_coords = bbox2polygon(minx, miny, maxx, maxy)
182+
collection["bbox_shape"] = {
183+
"type": "Polygon",
184+
"coordinates": bbox_polygon_coords,
185+
}
186+
logger.info(
187+
f"Collection '{collection_id}': Created bbox_shape from bbox [{minx}, {miny}, {maxx}, {maxy}]"
188+
)
189+
else:
190+
logger.warning(
191+
f"Collection '{collection_id}': bbox has insufficient coordinates (length={len(bbox)}), expected at least 4"
192+
)
193+
144194
if get_bool_env("STAC_INDEX_ASSETS"):
145195
collection["assets"] = [
146196
{"es_key": k, **v} for k, v in collection.get("assets", {}).items()
@@ -168,6 +218,9 @@ def db_to_stac(
168218
# Avoid modifying the input dict in-place ... doing so breaks some tests
169219
collection = deepcopy(collection)
170220

221+
# Remove internal bbox_shape field (not part of STAC spec)
222+
collection.pop("bbox_shape", None)
223+
171224
# Set defaults
172225
collection_id = collection.get("id")
173226
collection.setdefault("type", "Collection")

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ async def get_all_collections(
175175
limit: int,
176176
request: Request,
177177
sort: Optional[List[Dict[str, Any]]] = None,
178+
bbox: Optional[List[float]] = None,
178179
q: Optional[List[str]] = None,
179180
filter: Optional[Dict[str, Any]] = None,
180181
query: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -187,6 +188,7 @@ async def get_all_collections(
187188
limit (int): The number of results to return.
188189
request (Request): The FastAPI request object.
189190
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
191+
bbox (Optional[List[float]]): Bounding box to filter collections by spatial extent.
190192
q (Optional[List[str]]): Free text search terms.
191193
query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters.
192194
filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
@@ -309,12 +311,49 @@ async def get_all_collections(
309311
query_parts.append(search_dict["query"])
310312

311313
except Exception as e:
312-
logger = logging.getLogger(__name__)
313314
logger.error(f"Error converting query to Elasticsearch: {e}")
314315
# If there's an error, add a query that matches nothing
315316
query_parts.append({"bool": {"must_not": {"match_all": {}}}})
316317
raise
317318

319+
# Apply bbox filter if provided
320+
if bbox:
321+
# Parse bbox if it's a string (from GET requests)
322+
if isinstance(bbox, str):
323+
try:
324+
bbox = [float(x.strip()) for x in bbox.split(",")]
325+
except (ValueError, AttributeError) as e:
326+
logger.error(f"Invalid bbox format: {bbox}, error: {e}")
327+
bbox = None
328+
329+
if bbox and len(bbox) >= 4:
330+
# Extract 2D coordinates (bbox can be 2D [minx, miny, maxx, maxy] or 3D [minx, miny, minz, maxx, maxy, maxz])
331+
# For geospatial queries, we discard altitude (z) values
332+
minx, miny = bbox[0], bbox[1]
333+
if len(bbox) == 4:
334+
# 2D bbox
335+
maxx, maxy = bbox[2], bbox[3]
336+
else:
337+
# 3D bbox - extract indices 3,4 for maxx,maxy, discarding altitude at indices 2 (minz) and 5 (maxz)
338+
maxx, maxy = bbox[3], bbox[4]
339+
340+
# Convert bbox to a polygon for geo_shape query
341+
bbox_polygon = {
342+
"type": "Polygon",
343+
"coordinates": bbox2polygon(minx, miny, maxx, maxy),
344+
}
345+
# Add geo_shape query to filter collections by bbox_shape field
346+
query_parts.append(
347+
{
348+
"geo_shape": {
349+
"bbox_shape": {
350+
"shape": bbox_polygon,
351+
"relation": "intersects",
352+
}
353+
}
354+
}
355+
)
356+
318357
# Combine all query parts with AND logic if there are multiple
319358
datetime_filter = None
320359
if datetime:
@@ -381,7 +420,6 @@ async def get_all_collections(
381420
try:
382421
matched = count_task.result().get("count")
383422
except Exception as e:
384-
logger = logging.getLogger(__name__)
385423
logger.error(f"Count task failed: {e}")
386424

387425
return collections, next_token, matched

0 commit comments

Comments
 (0)