Skip to content

Commit dc31907

Browse files
committed
support cql2-text
1 parent 098872a commit dc31907

File tree

4 files changed

+117
-108
lines changed

4 files changed

+117
-108
lines changed

stac_fastapi/core/stac_fastapi/core/core.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,6 @@ async def all_collections(
248248
Returns:
249249
A Collections object containing all the collections in the database and links to various resources.
250250
"""
251-
print("filter: ", filter_expr)
252251
request = kwargs["request"]
253252
base_url = str(request.base_url)
254253
limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10)))
@@ -287,18 +286,45 @@ async def all_collections(
287286
parsed_filter = None
288287
if filter_expr is not None:
289288
try:
290-
import orjson
291-
292-
# Check if filter_lang is specified and not cql2-json
293-
if filter_lang is not None and filter_lang != "cql2-json":
289+
# Check if filter_lang is specified and not one of the supported formats
290+
if filter_lang is not None and filter_lang not in [
291+
"cql2-json",
292+
"cql2-text",
293+
]:
294294
# Raise an error for unsupported filter languages
295295
raise HTTPException(
296296
status_code=400,
297-
detail=f"Only 'cql2-json' filter language is supported for collections. Got '{filter_lang}'.",
297+
detail=f"Input should be 'cql2-json' or 'cql2-text' for collections. Got '{filter_lang}'.",
298298
)
299299

300-
# For GET requests, we only handle cql2-json
301-
parsed_filter = orjson.loads(unquote_plus(filter_expr))
300+
# Handle different filter formats
301+
try:
302+
if filter_lang == "cql2-text" or filter_lang is None:
303+
# For cql2-text or when no filter_lang is specified, try both formats
304+
try:
305+
# First try to parse as JSON
306+
parsed_filter = orjson.loads(unquote_plus(filter_expr))
307+
except Exception:
308+
# If that fails, use pygeofilter to convert CQL2-text to CQL2-JSON
309+
try:
310+
# Parse CQL2-text and convert to CQL2-JSON
311+
text_filter = unquote_plus(filter_expr)
312+
parsed_ast = parse_cql2_text(text_filter)
313+
parsed_filter = to_cql2(parsed_ast)
314+
except Exception as e:
315+
# If parsing fails, provide a helpful error message
316+
raise HTTPException(
317+
status_code=400,
318+
detail=f"Invalid CQL2-text filter: {e}. Please check your syntax.",
319+
)
320+
else:
321+
# For explicit cql2-json, parse as JSON
322+
parsed_filter = orjson.loads(unquote_plus(filter_expr))
323+
except Exception as e:
324+
# Catch any other parsing errors
325+
raise HTTPException(
326+
status_code=400, detail=f"Error parsing filter: {e}"
327+
)
302328
except Exception as e:
303329
raise HTTPException(
304330
status_code=400, detail=f"Invalid filter parameter: {e}"

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 13 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -263,91 +263,20 @@ async def get_all_collections(
263263

264264
# Apply structured filter if provided
265265
if filter:
266-
try:
267-
# For simple direct query handling without using to_es
268-
# This is a simplified approach that handles common filter patterns
269-
if isinstance(filter, dict):
270-
# Check if this is a CQL2 filter with op and args
271-
if "op" in filter and "args" in filter:
272-
op = filter.get("op")
273-
args = filter.get("args")
274-
275-
# Handle equality operator
276-
if (
277-
op == "="
278-
and len(args) == 2
279-
and isinstance(args[0], dict)
280-
and "property" in args[0]
281-
):
282-
field = args[0]["property"]
283-
value = args[1]
284-
285-
# Handle different field types
286-
if field == "id":
287-
# Direct match on ID field
288-
query_parts.append({"term": {"id": value}})
289-
elif field == "title":
290-
# Match on title field
291-
query_parts.append({"match": {"title": value}})
292-
elif field == "description":
293-
# Match on description field
294-
query_parts.append({"match": {"description": value}})
295-
else:
296-
# For other fields, try a multi-match query
297-
query_parts.append(
298-
{
299-
"multi_match": {
300-
"query": value,
301-
"fields": [field, f"{field}.*"],
302-
"type": "best_fields",
303-
}
304-
}
305-
)
306-
307-
# Handle regex operator
308-
elif (
309-
op == "=~"
310-
and len(args) == 2
311-
and isinstance(args[0], dict)
312-
and "property" in args[0]
313-
):
314-
field = args[0]["property"]
315-
pattern = args[1].replace(".*", "*")
316-
317-
# Use wildcard query for pattern matching
318-
query_parts.append(
319-
{
320-
"wildcard": {
321-
field: {
322-
"value": pattern,
323-
"case_insensitive": True,
324-
}
325-
}
326-
}
327-
)
328-
329-
# For other operators, use a match_all query as fallback
330-
else:
331-
query_parts.append({"match_all": {}})
332-
else:
333-
# Not a valid CQL2 filter
334-
query_parts.append({"match_all": {}})
335-
else:
336-
# Not a dictionary
337-
query_parts.append({"match_all": {}})
338-
except Exception as e:
339-
logger = logging.getLogger(__name__)
340-
logger.error(f"Error converting filter to Elasticsearch: {e}")
341-
# If there's an error, add a query that matches nothing
342-
query_parts.append({"bool": {"must_not": {"match_all": {}}}})
343-
raise
344-
345-
# Combine all query parts with AND logic if there are multiple
266+
# Convert string filter to dict if needed
267+
if isinstance(filter, str):
268+
filter = orjson.loads(filter)
269+
# Convert the filter to an Elasticsearch query using the filter module
270+
es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
271+
query_parts.append(es_query)
272+
273+
# Combine all query parts with AND logic
346274
if query_parts:
347-
if len(query_parts) == 1:
348-
body["query"] = query_parts[0]
349-
else:
350-
body["query"] = {"bool": {"must": query_parts}}
275+
body["query"] = (
276+
query_parts[0]
277+
if len(query_parts) == 1
278+
else {"bool": {"must": query_parts}}
279+
)
351280

352281
# Execute the search
353282
response = await self.client.search(

stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ async def get_all_collections(
160160
request: Request,
161161
sort: Optional[List[Dict[str, Any]]] = None,
162162
q: Optional[List[str]] = None,
163+
filter: Optional[Dict[str, Any]] = None,
163164
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
164165
"""Retrieve a list of collections from Elasticsearch, supporting pagination.
165166
@@ -169,6 +170,7 @@ async def get_all_collections(
169170
request (Request): The FastAPI request object.
170171
sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
171172
q (Optional[List[str]]): Free text search terms.
173+
filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
172174
173175
Returns:
174176
A tuple of (collections, next pagination token if any).
@@ -191,7 +193,7 @@ async def get_all_collections(
191193
raise HTTPException(
192194
status_code=400,
193195
detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. "
194-
+ "Text fields are not sortable by default in OpenSearch. "
196+
+ "Text fields are not sortable by default in Elasticsearch. "
195197
+ "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ",
196198
)
197199
formatted_sort.append({field: {"order": direction}})
@@ -209,6 +211,9 @@ async def get_all_collections(
209211
if token:
210212
body["search_after"] = [token]
211213

214+
# Build the query part of the body
215+
query_parts = []
216+
212217
# Apply free text query if provided
213218
if q:
214219
# For collections, we want to search across all relevant fields
@@ -235,11 +240,29 @@ async def get_all_collections(
235240
}
236241
)
237242

238-
# Add the query to the body using bool query with should clauses
239-
body["query"] = {
240-
"bool": {"should": should_clauses, "minimum_should_match": 1}
241-
}
243+
# Add the free text query to the query parts
244+
query_parts.append(
245+
{"bool": {"should": should_clauses, "minimum_should_match": 1}}
246+
)
242247

248+
# Apply structured filter if provided
249+
if filter:
250+
# Convert string filter to dict if needed
251+
if isinstance(filter, str):
252+
filter = orjson.loads(filter)
253+
# Convert the filter to an Elasticsearch query using the filter module
254+
es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
255+
query_parts.append(es_query)
256+
257+
# Combine all query parts with AND logic
258+
if query_parts:
259+
body["query"] = (
260+
query_parts[0]
261+
if len(query_parts) == 1
262+
else {"bool": {"must": query_parts}}
263+
)
264+
265+
# Execute the search
243266
response = await self.client.search(
244267
index=COLLECTIONS_INDEX,
245268
body=body,
@@ -255,7 +278,6 @@ async def get_all_collections(
255278

256279
next_token = None
257280
if len(hits) == limit:
258-
# Ensure we have a valid sort value for next_token
259281
next_token_values = hits[-1].get("sort")
260282
if next_token_values:
261283
next_token = next_token_values[0]
@@ -580,6 +602,10 @@ async def apply_cql2_filter(
580602
otherwise the original Search object.
581603
"""
582604
if _filter is not None:
605+
if isinstance(_filter, str):
606+
import json
607+
608+
_filter = json.loads(_filter)
583609
es_query = filter_module.to_es(await self.get_queryables_mapping(), _filter)
584610
search = search.filter(es_query)
585611

stac_fastapi/tests/api/test_api_search_collections.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -267,27 +267,55 @@ async def test_collections_filter_search(app_client, txn_client, load_test_data)
267267
test_collection["summaries"] = coll["summaries"]
268268
await create_collection(txn_client, test_collection)
269269

270-
# Test structured filter for collections with specific ID
270+
# Ensure collections are searchable
271+
from ..conftest import refresh_indices
272+
273+
await refresh_indices(txn_client)
274+
275+
# Test 1: CQL2-JSON format - filter for one of our test collections
271276
import json
272277

273-
# Create a simple filter for exact ID match - similar to what works in Postman
274-
filter_expr = {"op": "=", "args": [{"property": "id"}, f"{test_prefix}-sentinel"]}
278+
# Use the ID of the first test collection for the filter
279+
test_collection_id = test_collections[0]["id"]
280+
281+
# Create a simple filter for exact ID match using CQL2-JSON
282+
filter_expr = {"op": "=", "args": [{"property": "id"}, test_collection_id]}
275283

276284
# Convert to JSON string for URL parameter
277285
filter_json = json.dumps(filter_expr)
278286

279-
# Use the exact format that works in Postman
287+
# Use CQL2-JSON format with explicit filter-lang
280288
resp = await app_client.get(
281-
f"/collections?filter={filter_json}",
289+
f"/collections?filter={filter_json}&filter-lang=cql2-json",
282290
)
291+
283292
assert resp.status_code == 200
284293
resp_json = resp.json()
285294

286-
# Filter collections to only include the ones we created for this test
295+
# Should find exactly one collection with the specified ID
287296
found_collections = [
288-
c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
297+
c for c in resp_json["collections"] if c["id"] == test_collection_id
289298
]
290299

291-
# Should only find the sentinel collection
292-
assert len(found_collections) == 1
293-
assert found_collections[0]["id"] == f"{test_prefix}-sentinel"
300+
assert (
301+
len(found_collections) == 1
302+
), f"Expected 1 collection with ID {test_collection_id}, found {len(found_collections)}"
303+
assert found_collections[0]["id"] == test_collection_id
304+
305+
# Test 2: CQL2-text format with LIKE operator for more advanced filtering
306+
# Use a filter that will match the test collection ID we created
307+
filter_text = f"id LIKE '%{test_collection_id.split('-')[-1]}%'"
308+
309+
resp = await app_client.get(
310+
f"/collections?filter={filter_text}&filter-lang=cql2-text",
311+
)
312+
assert resp.status_code == 200
313+
resp_json = resp.json()
314+
315+
# Should find the test collection we created
316+
found_collections = [
317+
c for c in resp_json["collections"] if c["id"] == test_collection_id
318+
]
319+
assert (
320+
len(found_collections) >= 1
321+
), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter"

0 commit comments

Comments
 (0)