Skip to content

Commit 84c1089

Browse files
jonhealy1Yuri Zmytrakov
authored andcommitted
GET /collections search free text extension (#470)
**Related Issue(s):** - #460 **Description:** ex. `/collections?q=Sentinel-2a` **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog
1 parent ce672b0 commit 84c1089

File tree

8 files changed

+163
-8
lines changed

8 files changed

+163
-8
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
99

1010
### Added
1111

12+
- GET `/collections` collection search free text extension ex. `/collections?q=sentinel`. [#470](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/470)
1213
- Added `USE_DATETIME` environment variable to configure datetime search behavior in SFEOS. [#452](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/452)
1314
- GET `/collections` collection search sort extension ex. `/collections?sortby=+id`. [#456](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/456)
1415
- GET `/collections` collection search fields extension ex. `/collections?fields=id,title`. [#465](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/465)

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,11 @@ SFEOS implements extended capabilities for the `/collections` endpoint, allowing
126126
- Example: `/collections?fields=id,title,description`
127127
- This helps reduce payload size when only certain fields are needed
128128

129+
- **Free Text Search**: Search across collection text fields using the `q` parameter
130+
- Example: `/collections?q=landsat`
131+
- Searches across multiple text fields including title, description, and keywords
132+
- Supports partial word matching and relevance-based sorting
133+
129134
These extensions make it easier to build user interfaces that display and navigate through collections efficiently.
130135

131136
> **Configuration**: Collection search extensions can be disabled by setting the `ENABLE_COLLECTIONS_SEARCH` environment variable to `false`. By default, these extensions are enabled.

stac_fastapi/core/stac_fastapi/core/core.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,18 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage:
230230
return landing_page
231231

232232
async def all_collections(
233-
self, fields: Optional[List[str]] = None, sortby: Optional[str] = None, **kwargs
233+
self,
234+
fields: Optional[List[str]] = None,
235+
sortby: Optional[str] = None,
236+
q: Optional[Union[str, List[str]]] = None,
237+
**kwargs,
234238
) -> stac_types.Collections:
235239
"""Read all collections from the database.
236240
237241
Args:
238242
fields (Optional[List[str]]): Fields to include or exclude from the results.
239243
sortby (Optional[str]): Sorting options for the results.
244+
q (Optional[List[str]]): Free text search terms.
240245
**kwargs: Keyword arguments from the request.
241246
242247
Returns:
@@ -278,8 +283,13 @@ async def all_collections(
278283
except Exception:
279284
redis = None
280285

286+
# Convert q to a list if it's a string
287+
q_list = None
288+
if q is not None:
289+
q_list = [q] if isinstance(q, str) else q
290+
281291
collections, next_token = await self.database.get_all_collections(
282-
token=token, limit=limit, request=request, sort=sort
292+
token=token, limit=limit, request=request, sort=sort, q=q_list
283293
)
284294

285295
# Apply field filtering if fields parameter was provided

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@
4545
)
4646
from stac_fastapi.extensions.core.fields import FieldsConformanceClasses
4747
from stac_fastapi.extensions.core.filter import FilterConformanceClasses
48-
49-
# from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
48+
from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
5049
from stac_fastapi.extensions.core.query import QueryConformanceClasses
5150
from stac_fastapi.extensions.core.sort import SortConformanceClasses
5251
from stac_fastapi.extensions.third_party import BulkTransactionExtension
@@ -127,7 +126,7 @@
127126
# CollectionSearchFilterExtension(
128127
# conformance_classes=[FilterConformanceClasses.COLLECTIONS]
129128
# ),
130-
# FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
129+
FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
131130
]
132131

133132
# Initialize collection search with its extensions

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ async def get_all_collections(
175175
limit: int,
176176
request: Request,
177177
sort: Optional[List[Dict[str, Any]]] = None,
178+
q: Optional[List[str]] = None,
178179
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
179180
"""Retrieve a list of collections from Elasticsearch, supporting pagination.
180181
@@ -183,6 +184,7 @@ async def get_all_collections(
183184
limit (int): The number of results to return.
184185
request (Request): The FastAPI request object.
185186
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
187+
q (Optional[List[str]]): Free text search terms.
186188
187189
Returns:
188190
A tuple of (collections, next pagination token if any).
@@ -223,6 +225,38 @@ async def get_all_collections(
223225
if token:
224226
body["search_after"] = [token]
225227

228+
# Apply free text query if provided
229+
if q:
230+
# For collections, we want to search across all relevant fields
231+
should_clauses = []
232+
233+
# For each search term
234+
for term in q:
235+
# Create a multi_match query for each term
236+
for field in [
237+
"id",
238+
"title",
239+
"description",
240+
"keywords",
241+
"summaries.platform",
242+
"summaries.constellation",
243+
"providers.name",
244+
"providers.url",
245+
]:
246+
should_clauses.append(
247+
{
248+
"wildcard": {
249+
field: {"value": f"*{term}*", "case_insensitive": True}
250+
}
251+
}
252+
)
253+
254+
# Add the query to the body using bool query with should clauses
255+
body["query"] = {
256+
"bool": {"should": should_clauses, "minimum_should_match": 1}
257+
}
258+
259+
# Execute the search
226260
response = await self.client.search(
227261
index=COLLECTIONS_INDEX,
228262
body=body,

stac_fastapi/opensearch/stac_fastapi/opensearch/app.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@
3939
)
4040
from stac_fastapi.extensions.core.fields import FieldsConformanceClasses
4141
from stac_fastapi.extensions.core.filter import FilterConformanceClasses
42-
43-
# from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
42+
from stac_fastapi.extensions.core.free_text import FreeTextConformanceClasses
4443
from stac_fastapi.extensions.core.query import QueryConformanceClasses
4544
from stac_fastapi.extensions.core.sort import SortConformanceClasses
4645
from stac_fastapi.extensions.third_party import BulkTransactionExtension
@@ -127,7 +126,7 @@
127126
# CollectionSearchFilterExtension(
128127
# conformance_classes=[FilterConformanceClasses.COLLECTIONS]
129128
# ),
130-
# FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
129+
FreeTextExtension(conformance_classes=[FreeTextConformanceClasses.COLLECTIONS]),
131130
]
132131

133132
# Initialize collection search with its extensions

stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ async def get_all_collections(
159159
limit: int,
160160
request: Request,
161161
sort: Optional[List[Dict[str, Any]]] = None,
162+
q: Optional[List[str]] = None,
162163
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
163164
"""Retrieve a list of collections from Elasticsearch, supporting pagination.
164165
@@ -167,6 +168,7 @@ async def get_all_collections(
167168
limit (int): The number of results to return.
168169
request (Request): The FastAPI request object.
169170
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
171+
q (Optional[List[str]]): Free text search terms.
170172
171173
Returns:
172174
A tuple of (collections, next pagination token if any).
@@ -207,6 +209,37 @@ async def get_all_collections(
207209
if token:
208210
body["search_after"] = [token]
209211

212+
# Apply free text query if provided
213+
if q:
214+
# For collections, we want to search across all relevant fields
215+
should_clauses = []
216+
217+
# For each search term
218+
for term in q:
219+
# Create a multi_match query for each term
220+
for field in [
221+
"id",
222+
"title",
223+
"description",
224+
"keywords",
225+
"summaries.platform",
226+
"summaries.constellation",
227+
"providers.name",
228+
"providers.url",
229+
]:
230+
should_clauses.append(
231+
{
232+
"wildcard": {
233+
field: {"value": f"*{term}*", "case_insensitive": True}
234+
}
235+
}
236+
)
237+
238+
# Add the query to the body using bool query with should clauses
239+
body["query"] = {
240+
"bool": {"should": should_clauses, "minimum_should_match": 1}
241+
}
242+
210243
response = await self.client.search(
211244
index=COLLECTIONS_INDEX,
212245
body=body,

stac_fastapi/tests/api/test_api_search_collections.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,77 @@ async def test_collections_fields(app_client, txn_client, load_test_data):
152152
assert "title" in collection
153153
assert "description" not in collection
154154
assert "links" in collection
155+
156+
157+
@pytest.mark.asyncio
158+
async def test_collections_free_text_search_get(app_client, txn_client, load_test_data):
159+
"""Verify GET /collections honors the q parameter for free text search."""
160+
# Create multiple collections with different content
161+
base_collection = load_test_data("test_collection.json")
162+
163+
# Use unique prefixes to avoid conflicts between tests
164+
test_prefix = f"q-get-{uuid.uuid4().hex[:8]}"
165+
166+
# Create collections with different content to test free text search
167+
test_collections = [
168+
{
169+
"id": f"{test_prefix}-sentinel",
170+
"title": "Sentinel-2 Collection",
171+
"description": "Collection of Sentinel-2 data",
172+
"summaries": {"platform": ["sentinel-2a", "sentinel-2b"]},
173+
},
174+
{
175+
"id": f"{test_prefix}-landsat",
176+
"title": "Landsat Collection",
177+
"description": "Collection of Landsat data",
178+
"summaries": {"platform": ["landsat-8", "landsat-9"]},
179+
},
180+
{
181+
"id": f"{test_prefix}-modis",
182+
"title": "MODIS Collection",
183+
"description": "Collection of MODIS data",
184+
"summaries": {"platform": ["terra", "aqua"]},
185+
},
186+
]
187+
188+
for i, coll in enumerate(test_collections):
189+
test_collection = base_collection.copy()
190+
test_collection["id"] = coll["id"]
191+
test_collection["title"] = coll["title"]
192+
test_collection["description"] = coll["description"]
193+
test_collection["summaries"] = coll["summaries"]
194+
await create_collection(txn_client, test_collection)
195+
196+
# Test free text search for "sentinel"
197+
resp = await app_client.get(
198+
"/collections",
199+
params=[("q", "sentinel")],
200+
)
201+
assert resp.status_code == 200
202+
resp_json = resp.json()
203+
204+
# Filter collections to only include the ones we created for this test
205+
found_collections = [
206+
c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
207+
]
208+
209+
# Should only find the sentinel collection
210+
assert len(found_collections) == 1
211+
assert found_collections[0]["id"] == f"{test_prefix}-sentinel"
212+
213+
# Test free text search for "landsat"
214+
resp = await app_client.get(
215+
"/collections",
216+
params=[("q", "modis")],
217+
)
218+
assert resp.status_code == 200
219+
resp_json = resp.json()
220+
221+
# Filter collections to only include the ones we created for this test
222+
found_collections = [
223+
c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
224+
]
225+
226+
# Should only find the landsat collection
227+
assert len(found_collections) == 1
228+
assert found_collections[0]["id"] == f"{test_prefix}-modis"

0 commit comments

Comments
 (0)