support cql2-text

jonhealy1 · jonhealy1 · commit dc31907dc414 · 2025-09-27T00:02:55.000+08:00
diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py
@@ -248,7 +248,6 @@ async def all_collections(
         Returns:
             A Collections object containing all the collections in the database and links to various resources.
         """
-        print("filter: ", filter_expr)
         request = kwargs["request"]
         base_url = str(request.base_url)
         limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10)))
@@ -287,18 +286,45 @@ async def all_collections(
         parsed_filter = None
         if filter_expr is not None:
             try:
-                import orjson
-
-                # Check if filter_lang is specified and not cql2-json
-                if filter_lang is not None and filter_lang != "cql2-json":
+                # Check if filter_lang is specified and not one of the supported formats
+                if filter_lang is not None and filter_lang not in [
+                    "cql2-json",
+                    "cql2-text",
+                ]:
                     # Raise an error for unsupported filter languages
                     raise HTTPException(
                         status_code=400,
-                        detail=f"Only 'cql2-json' filter language is supported for collections. Got '{filter_lang}'.",
+                        detail=f"Input should be 'cql2-json' or 'cql2-text' for collections. Got '{filter_lang}'.",
                     )
 
-                # For GET requests, we only handle cql2-json
-                parsed_filter = orjson.loads(unquote_plus(filter_expr))
+                # Handle different filter formats
+                try:
+                    if filter_lang == "cql2-text" or filter_lang is None:
+                        # For cql2-text or when no filter_lang is specified, try both formats
+                        try:
+                            # First try to parse as JSON
+                            parsed_filter = orjson.loads(unquote_plus(filter_expr))
+                        except Exception:
+                            # If that fails, use pygeofilter to convert CQL2-text to CQL2-JSON
+                            try:
+                                # Parse CQL2-text and convert to CQL2-JSON
+                                text_filter = unquote_plus(filter_expr)
+                                parsed_ast = parse_cql2_text(text_filter)
+                                parsed_filter = to_cql2(parsed_ast)
+                            except Exception as e:
+                                # If parsing fails, provide a helpful error message
+                                raise HTTPException(
+                                    status_code=400,
+                                    detail=f"Invalid CQL2-text filter: {e}. Please check your syntax.",
+                                )
+                    else:
+                        # For explicit cql2-json, parse as JSON
+                        parsed_filter = orjson.loads(unquote_plus(filter_expr))
+                except Exception as e:
+                    # Catch any other parsing errors
+                    raise HTTPException(
+                        status_code=400, detail=f"Error parsing filter: {e}"
+                    )
             except Exception as e:
                 raise HTTPException(
                     status_code=400, detail=f"Invalid filter parameter: {e}"
diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py
@@ -263,91 +263,20 @@ async def get_all_collections(
 
         # Apply structured filter if provided
         if filter:
-            try:
-                # For simple direct query handling without using to_es
-                # This is a simplified approach that handles common filter patterns
-                if isinstance(filter, dict):
-                    # Check if this is a CQL2 filter with op and args
-                    if "op" in filter and "args" in filter:
-                        op = filter.get("op")
-                        args = filter.get("args")
-
-                        # Handle equality operator
-                        if (
-                            op == "="
-                            and len(args) == 2
-                            and isinstance(args[0], dict)
-                            and "property" in args[0]
-                        ):
-                            field = args[0]["property"]
-                            value = args[1]
-
-                            # Handle different field types
-                            if field == "id":
-                                # Direct match on ID field
-                                query_parts.append({"term": {"id": value}})
-                            elif field == "title":
-                                # Match on title field
-                                query_parts.append({"match": {"title": value}})
-                            elif field == "description":
-                                # Match on description field
-                                query_parts.append({"match": {"description": value}})
-                            else:
-                                # For other fields, try a multi-match query
-                                query_parts.append(
-                                    {
-                                        "multi_match": {
-                                            "query": value,
-                                            "fields": [field, f"{field}.*"],
-                                            "type": "best_fields",
-                                        }
-                                    }
-                                )
-
-                        # Handle regex operator
-                        elif (
-                            op == "=~"
-                            and len(args) == 2
-                            and isinstance(args[0], dict)
-                            and "property" in args[0]
-                        ):
-                            field = args[0]["property"]
-                            pattern = args[1].replace(".*", "*")
-
-                            # Use wildcard query for pattern matching
-                            query_parts.append(
-                                {
-                                    "wildcard": {
-                                        field: {
-                                            "value": pattern,
-                                            "case_insensitive": True,
-                                        }
-                                    }
-                                }
-                            )
-
-                        # For other operators, use a match_all query as fallback
-                        else:
-                            query_parts.append({"match_all": {}})
-                    else:
-                        # Not a valid CQL2 filter
-                        query_parts.append({"match_all": {}})
-                else:
-                    # Not a dictionary
-                    query_parts.append({"match_all": {}})
-            except Exception as e:
-                logger = logging.getLogger(__name__)
-                logger.error(f"Error converting filter to Elasticsearch: {e}")
-                # If there's an error, add a query that matches nothing
-                query_parts.append({"bool": {"must_not": {"match_all": {}}}})
-                raise
-
-        # Combine all query parts with AND logic if there are multiple
+            # Convert string filter to dict if needed
+            if isinstance(filter, str):
+                filter = orjson.loads(filter)
+            # Convert the filter to an Elasticsearch query using the filter module
+            es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
+            query_parts.append(es_query)
+
+        # Combine all query parts with AND logic
         if query_parts:
-            if len(query_parts) == 1:
-                body["query"] = query_parts[0]
-            else:
-                body["query"] = {"bool": {"must": query_parts}}
+            body["query"] = (
+                query_parts[0]
+                if len(query_parts) == 1
+                else {"bool": {"must": query_parts}}
+            )
 
         # Execute the search
         response = await self.client.search(
diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py
@@ -160,6 +160,7 @@ async def get_all_collections(
         request: Request,
         sort: Optional[List[Dict[str, Any]]] = None,
         q: Optional[List[str]] = None,
+        filter: Optional[Dict[str, Any]] = None,
     ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
         """Retrieve a list of collections from Elasticsearch, supporting pagination.
 
@@ -169,6 +170,7 @@ async def get_all_collections(
             request (Request): The FastAPI request object.
             sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
             q (Optional[List[str]]): Free text search terms.
+            filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
 
         Returns:
             A tuple of (collections, next pagination token if any).
@@ -191,7 +193,7 @@ async def get_all_collections(
                         raise HTTPException(
                             status_code=400,
                             detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. "
-                            + "Text fields are not sortable by default in OpenSearch. "
+                            + "Text fields are not sortable by default in Elasticsearch. "
                             + "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ",
                         )
                     formatted_sort.append({field: {"order": direction}})
@@ -209,6 +211,9 @@ async def get_all_collections(
         if token:
             body["search_after"] = [token]
 
+        # Build the query part of the body
+        query_parts = []
+
         # Apply free text query if provided
         if q:
             # For collections, we want to search across all relevant fields
@@ -235,11 +240,29 @@ async def get_all_collections(
                         }
                     )
 
-            # Add the query to the body using bool query with should clauses
-            body["query"] = {
-                "bool": {"should": should_clauses, "minimum_should_match": 1}
-            }
+            # Add the free text query to the query parts
+            query_parts.append(
+                {"bool": {"should": should_clauses, "minimum_should_match": 1}}
+            )
 
+        # Apply structured filter if provided
+        if filter:
+            # Convert string filter to dict if needed
+            if isinstance(filter, str):
+                filter = orjson.loads(filter)
+            # Convert the filter to an Elasticsearch query using the filter module
+            es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
+            query_parts.append(es_query)
+
+        # Combine all query parts with AND logic
+        if query_parts:
+            body["query"] = (
+                query_parts[0]
+                if len(query_parts) == 1
+                else {"bool": {"must": query_parts}}
+            )
+
+        # Execute the search
         response = await self.client.search(
             index=COLLECTIONS_INDEX,
             body=body,
@@ -255,7 +278,6 @@ async def get_all_collections(
 
         next_token = None
         if len(hits) == limit:
-            # Ensure we have a valid sort value for next_token
             next_token_values = hits[-1].get("sort")
             if next_token_values:
                 next_token = next_token_values[0]
@@ -580,6 +602,10 @@ async def apply_cql2_filter(
                     otherwise the original Search object.
         """
         if _filter is not None:
+            if isinstance(_filter, str):
+                import json
+
+                _filter = json.loads(_filter)
             es_query = filter_module.to_es(await self.get_queryables_mapping(), _filter)
             search = search.filter(es_query)
 
diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py
@@ -267,27 +267,55 @@ async def test_collections_filter_search(app_client, txn_client, load_test_data)
         test_collection["summaries"] = coll["summaries"]
         await create_collection(txn_client, test_collection)
 
-    # Test structured filter for collections with specific ID
+    # Ensure collections are searchable
+    from ..conftest import refresh_indices
+
+    await refresh_indices(txn_client)
+
+    # Test 1: CQL2-JSON format - filter for one of our test collections
     import json
 
-    # Create a simple filter for exact ID match - similar to what works in Postman
-    filter_expr = {"op": "=", "args": [{"property": "id"}, f"{test_prefix}-sentinel"]}
+    # Use the ID of the first test collection for the filter
+    test_collection_id = test_collections[0]["id"]
+
+    # Create a simple filter for exact ID match using CQL2-JSON
+    filter_expr = {"op": "=", "args": [{"property": "id"}, test_collection_id]}
 
     # Convert to JSON string for URL parameter
     filter_json = json.dumps(filter_expr)
 
-    # Use the exact format that works in Postman
+    # Use CQL2-JSON format with explicit filter-lang
     resp = await app_client.get(
-        f"/collections?filter={filter_json}",
+        f"/collections?filter={filter_json}&filter-lang=cql2-json",
     )
+
     assert resp.status_code == 200
     resp_json = resp.json()
 
-    # Filter collections to only include the ones we created for this test
+    # Should find exactly one collection with the specified ID
     found_collections = [
-        c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
+        c for c in resp_json["collections"] if c["id"] == test_collection_id
     ]
 
-    # Should only find the sentinel collection
-    assert len(found_collections) == 1
-    assert found_collections[0]["id"] == f"{test_prefix}-sentinel"
+    assert (
+        len(found_collections) == 1
+    ), f"Expected 1 collection with ID {test_collection_id}, found {len(found_collections)}"
+    assert found_collections[0]["id"] == test_collection_id
+
+    # Test 2: CQL2-text format with LIKE operator for more advanced filtering
+    # Use a filter that will match the test collection ID we created
+    filter_text = f"id LIKE '%{test_collection_id.split('-')[-1]}%'"
+
+    resp = await app_client.get(
+        f"/collections?filter={filter_text}&filter-lang=cql2-text",
+    )
+    assert resp.status_code == 200
+    resp_json = resp.json()
+
+    # Should find the test collection we created
+    found_collections = [
+        c for c in resp_json["collections"] if c["id"] == test_collection_id
+    ]
+    assert (
+        len(found_collections) >= 1
+    ), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter"