Skip to content

Commit 0988448

Browse files
authored
GET /collections search free text extension (#470)
**Related Issue(s):** - #460 **Description:** ex. `/collections?q=Sentinel-2a` **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog
1 parent 8dd200e commit 0988448

File tree

8 files changed

+163
-8
lines changed

8 files changed

+163
-8
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
99

1010
### Added
1111

12+
- GET `/collections` collection search free text extension ex. `/collections?q=sentinel`. [#470](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/470)
1213
- Added `USE_DATETIME` environment variable to configure datetime search behavior in SFEOS. [#452](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/452)
1314
- GET `/collections` collection search sort extension ex. `/collections?sortby=+id`. [#456](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/456)
1415
- GET `/collections` collection search fields extension ex. `/collections?fields=id,title`. [#465](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/465)

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,11 @@ SFEOS implements extended capabilities for the `/collections` endpoint, allowing
126126
- Example: `/collections?fields=id,title,description`
127127
- This helps reduce payload size when only certain fields are needed
128128

129+
- **Free Text Search**: Search across collection text fields using the `q` parameter
130+
- Example: `/collections?q=landsat`
131+
- Searches across multiple text fields including title, description, and keywords
132+
- Supports partial word matching and relevance-based sorting
133+
129134
These extensions make it easier to build user interfaces that display and navigate through collections efficiently.
130135

131136
> **Configuration**: Collection search extensions can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled.

stac_fastapi/core/stac_fastapi/core/core.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,13 +225,18 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage:
225225
return landing_page
226226

227227
async def all_collections(
228-
self, fields: Optional[List[str]] = None, sortby: Optional[str] = None, **kwargs
228+
self,
229+
fields: Optional[List[str]] = None,
230+
sortby: Optional[str] = None,
231+
q: Optional[Union[str, List[str]]] = None,
232+
**kwargs,
229233
) -> stac_types.Collections:
230234
"""Read all collections from the database.
231235
232236
Args:
233237
fields (Optional[List[str]]): Fields to include or exclude from the results.
234238
sortby (Optional[str]): Sorting options for the results.
239+
q (Optional[List[str]]): Free text search terms.
235240
**kwargs: Keyword arguments from the request.
236241
237242
Returns:
@@ -266,8 +271,13 @@ async def all_collections(
266271
if parsed_sort:
267272
sort = parsed_sort
268273

274+
# Convert q to a list if it's a string
275+
q_list = None
276+
if q is not None:
277+
q_list = [q] if isinstance(q, str) else q
278+
269279
collections, next_token = await self.database.get_all_collections(
270-
token=token, limit=limit, request=request, sort=sort
280+
token=token, limit=limit, request=request, sort=sort, q=q_list
271281
)
272282

273283
# Apply field filtering if fields parameter was provided

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@
4545
)
4646
from stac_fastapi.extensions.core.fields import FieldsConformanceClasses
4747
from stac_fastapi.extensions.core.filter import FilterConformanceClasses
48-
49-
# from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
48+
from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
5049
from stac_fastapi.extensions.core.query import QueryConformanceClasses
5150
from stac_fastapi.extensions.core.sort import SortConformanceClasses
5251
from stac_fastapi.extensions.third_party import BulkTransactionExtension
@@ -127,7 +126,7 @@
127126
# CollectionSearchFilterExtension(
128127
# conformance_classes=[FilterConformanceClasses.COLLECTIONS]
129128
# ),
130-
# FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
129+
FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
131130
]
132131

133132
# Initialize collection search with its extensions

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ async def get_all_collections(
175175
limit: int,
176176
request: Request,
177177
sort: Optional[List[Dict[str, Any]]] = None,
178+
q: Optional[List[str]] = None,
178179
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
179180
"""Retrieve a list of collections from Elasticsearch, supporting pagination.
180181
@@ -183,6 +184,7 @@ async def get_all_collections(
183184
limit (int): The number of results to return.
184185
request (Request): The FastAPI request object.
185186
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
187+
q (Optional[List[str]]): Free text search terms.
186188
187189
Returns:
188190
A tuple of (collections, next pagination token if any).
@@ -223,6 +225,38 @@ async def get_all_collections(
223225
if token:
224226
body["search_after"] = [token]
225227

228+
# Apply free text query if provided
229+
if q:
230+
# For collections, we want to search across all relevant fields
231+
should_clauses = []
232+
233+
# For each search term
234+
for term in q:
235+
# Create a multi_match query for each term
236+
for field in [
237+
"id",
238+
"title",
239+
"description",
240+
"keywords",
241+
"summaries.platform",
242+
"summaries.constellation",
243+
"providers.name",
244+
"providers.url",
245+
]:
246+
should_clauses.append(
247+
{
248+
"wildcard": {
249+
field: {"value": f"*{term}*", "case_insensitive": True}
250+
}
251+
}
252+
)
253+
254+
# Add the query to the body using bool query with should clauses
255+
body["query"] = {
256+
"bool": {"should": should_clauses, "minimum_should_match": 1}
257+
}
258+
259+
# Execute the search
226260
response = await self.client.search(
227261
index=COLLECTIONS_INDEX,
228262
body=body,

stac_fastapi/opensearch/stac_fastapi/opensearch/app.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@
3939
)
4040
from stac_fastapi.extensions.core.fields import FieldsConformanceClasses
4141
from stac_fastapi.extensions.core.filter import FilterConformanceClasses
42-
43-
# from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
42+
from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
4443
from stac_fastapi.extensions.core.query import QueryConformanceClasses
4544
from stac_fastapi.extensions.core.sort import SortConformanceClasses
4645
from stac_fastapi.extensions.third_party import BulkTransactionExtension
@@ -127,7 +126,7 @@
127126
# CollectionSearchFilterExtension(
128127
# conformance_classes=[FilterConformanceClasses.COLLECTIONS]
129128
# ),
130-
# FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
129+
FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
131130
]
132131

133132
# Initialize collection search with its extensions

stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ async def get_all_collections(
159159
limit: int,
160160
request: Request,
161161
sort: Optional[List[Dict[str, Any]]] = None,
162+
q: Optional[List[str]] = None,
162163
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
163164
"""Retrieve a list of collections from Elasticsearch, supporting pagination.
164165
@@ -167,6 +168,7 @@ async def get_all_collections(
167168
limit (int): The number of results to return.
168169
request (Request): The FastAPI request object.
169170
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
171+
q (Optional[List[str]]): Free text search terms.
170172
171173
Returns:
172174
A tuple of (collections, next pagination token if any).
@@ -207,6 +209,37 @@ async def get_all_collections(
207209
if token:
208210
body["search_after"] = [token]
209211

212+
# Apply free text query if provided
213+
if q:
214+
# For collections, we want to search across all relevant fields
215+
should_clauses = []
216+
217+
# For each search term
218+
for term in q:
219+
# Create a multi_match query for each term
220+
for field in [
221+
"id",
222+
"title",
223+
"description",
224+
"keywords",
225+
"summaries.platform",
226+
"summaries.constellation",
227+
"providers.name",
228+
"providers.url",
229+
]:
230+
should_clauses.append(
231+
{
232+
"wildcard": {
233+
field: {"value": f"*{term}*", "case_insensitive": True}
234+
}
235+
}
236+
)
237+
238+
# Add the query to the body using bool query with should clauses
239+
body["query"] = {
240+
"bool": {"should": should_clauses, "minimum_should_match": 1}
241+
}
242+
210243
response = await self.client.search(
211244
index=COLLECTIONS_INDEX,
212245
body=body,

stac_fastapi/tests/api/test_api_search_collections.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,77 @@ async def test_collections_fields(app_client, txn_client, load_test_data):
152152
assert "title" in collection
153153
assert "description" not in collection
154154
assert "links" in collection
155+
156+
157+
@pytest.mark.asyncio
158+
async def test_collections_free_text_search_get(app_client, txn_client, load_test_data):
159+
"""Verify GET /collections honors the q parameter for free text search."""
160+
# Create multiple collections with different content
161+
base_collection = load_test_data("test_collection.json")
162+
163+
# Use unique prefixes to avoid conflicts between tests
164+
test_prefix = f"q-get-{uuid.uuid4().hex[:8]}"
165+
166+
# Create collections with different content to test free text search
167+
test_collections = [
168+
{
169+
"id": f"{test_prefix}-sentinel",
170+
"title": "Sentinel-2 Collection",
171+
"description": "Collection of Sentinel-2 data",
172+
"summaries": {"platform": ["sentinel-2a", "sentinel-2b"]},
173+
},
174+
{
175+
"id": f"{test_prefix}-landsat",
176+
"title": "Landsat Collection",
177+
"description": "Collection of Landsat data",
178+
"summaries": {"platform": ["landsat-8", "landsat-9"]},
179+
},
180+
{
181+
"id": f"{test_prefix}-modis",
182+
"title": "MODIS Collection",
183+
"description": "Collection of MODIS data",
184+
"summaries": {"platform": ["terra", "aqua"]},
185+
},
186+
]
187+
188+
for i, coll in enumerate(test_collections):
189+
test_collection = base_collection.copy()
190+
test_collection["id"] = coll["id"]
191+
test_collection["title"] = coll["title"]
192+
test_collection["description"] = coll["description"]
193+
test_collection["summaries"] = coll["summaries"]
194+
await create_collection(txn_client, test_collection)
195+
196+
# Test free text search for "sentinel"
197+
resp = await app_client.get(
198+
"/collections",
199+
params=[("q", "sentinel")],
200+
)
201+
assert resp.status_code == 200
202+
resp_json = resp.json()
203+
204+
# Filter collections to only include the ones we created for this test
205+
found_collections = [
206+
c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
207+
]
208+
209+
# Should only find the sentinel collection
210+
assert len(found_collections) == 1
211+
assert found_collections[0]["id"] == f"{test_prefix}-sentinel"
212+
213+
# Test free text search for "landsat"
214+
resp = await app_client.get(
215+
"/collections",
216+
params=[("q", "modis")],
217+
)
218+
assert resp.status_code == 200
219+
resp_json = resp.json()
220+
221+
# Filter collections to only include the ones we created for this test
222+
found_collections = [
223+
c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
224+
]
225+
226+
# Should only find the landsat collection
227+
assert len(found_collections) == 1
228+
assert found_collections[0]["id"] == f"{test_prefix}-modis"

0 commit comments

Comments
 (0)