filter scratch

jonhealy1 · jonhealy1 · commit b585819d264c · 2025-09-26T11:25:32.000+08:00
diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py
@@ -228,6 +228,7 @@ async def all_collections(
         self,
         fields: Optional[List[str]] = None,
         sortby: Optional[str] = None,
+        filter_expr: Optional[str] = None,
         q: Optional[Union[str, List[str]]] = None,
         **kwargs,
     ) -> stac_types.Collections:
@@ -236,12 +237,14 @@ async def all_collections(
         Args:
             fields (Optional[List[str]]): Fields to include or exclude from the results.
             sortby (Optional[str]): Sorting options for the results.
-            q (Optional[List[str]]): Free text search terms.
+            filter_expr (Optional[str]): Structured filter in CQL2 format.
+            q (Optional[Union[str, List[str]]]): Free text search terms.
             **kwargs: Keyword arguments from the request.
 
         Returns:
             A Collections object containing all the collections in the database and links to various resources.
         """
+        print("filter: ", filter_expr)
         request = kwargs["request"]
         base_url = str(request.base_url)
         limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10)))
@@ -276,8 +279,25 @@ async def all_collections(
         if q is not None:
             q_list = [q] if isinstance(q, str) else q
 
+        # Parse the filter parameter if provided
+        parsed_filter = None
+        if filter_expr is not None:
+            try:
+                import orjson
+
+                parsed_filter = orjson.loads(filter_expr)
+            except Exception as e:
+                raise HTTPException(
+                    status_code=400, detail=f"Invalid filter parameter: {e}"
+                )
+
         collections, next_token = await self.database.get_all_collections(
-            token=token, limit=limit, request=request, sort=sort, q=q_list
+            token=token,
+            limit=limit,
+            request=request,
+            sort=sort,
+            q=q_list,
+            filter=parsed_filter,
         )
 
         # Apply field filtering if fields parameter was provided
diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py
@@ -176,6 +176,7 @@ async def get_all_collections(
         request: Request,
         sort: Optional[List[Dict[str, Any]]] = None,
         q: Optional[List[str]] = None,
+        filter: Optional[Dict[str, Any]] = None,
     ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
         """Retrieve a list of collections from Elasticsearch, supporting pagination.
 
@@ -185,6 +186,7 @@ async def get_all_collections(
             request (Request): The FastAPI request object.
             sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
             q (Optional[List[str]]): Free text search terms.
+            filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
 
         Returns:
             A tuple of (collections, next pagination token if any).
@@ -225,6 +227,9 @@ async def get_all_collections(
         if token:
             body["search_after"] = [token]
 
+        # Build the query part of the body
+        query_parts = []
+
         # Apply free text query if provided
         if q:
             # For collections, we want to search across all relevant fields
@@ -251,10 +256,98 @@ async def get_all_collections(
                         }
                     )
 
-            # Add the query to the body using bool query with should clauses
-            body["query"] = {
-                "bool": {"should": should_clauses, "minimum_should_match": 1}
-            }
+            # Add the free text query to the query parts
+            query_parts.append(
+                {"bool": {"should": should_clauses, "minimum_should_match": 1}}
+            )
+
+        # Apply structured filter if provided
+        if filter:
+            try:
+                # For simple direct query handling without using to_es
+                # This is a simplified approach that handles common filter patterns
+                if isinstance(filter, dict):
+                    # Check if this is a CQL2 filter with op and args
+                    if "op" in filter and "args" in filter:
+                        op = filter.get("op")
+                        args = filter.get("args")
+
+                        # Handle equality operator
+                        if (
+                            op == "="
+                            and len(args) == 2
+                            and isinstance(args[0], dict)
+                            and "property" in args[0]
+                        ):
+                            field = args[0]["property"]
+                            value = args[1]
+
+                            # Handle different field types
+                            if field == "id":
+                                # Direct match on ID field
+                                query_parts.append({"term": {"id": value}})
+                            elif field == "title":
+                                # Match on title field
+                                query_parts.append({"match": {"title": value}})
+                            elif field == "description":
+                                # Match on description field
+                                query_parts.append({"match": {"description": value}})
+                            else:
+                                # For other fields, try a multi-match query
+                                query_parts.append(
+                                    {
+                                        "multi_match": {
+                                            "query": value,
+                                            "fields": [field, f"{field}.*"],
+                                            "type": "best_fields",
+                                        }
+                                    }
+                                )
+
+                        # Handle regex operator
+                        elif (
+                            op == "=~"
+                            and len(args) == 2
+                            and isinstance(args[0], dict)
+                            and "property" in args[0]
+                        ):
+                            field = args[0]["property"]
+                            pattern = args[1].replace(".*", "*")
+
+                            # Use wildcard query for pattern matching
+                            query_parts.append(
+                                {
+                                    "wildcard": {
+                                        field: {
+                                            "value": pattern,
+                                            "case_insensitive": True,
+                                        }
+                                    }
+                                }
+                            )
+
+                        # For other operators, use a match_all query as fallback
+                        else:
+                            query_parts.append({"match_all": {}})
+                    else:
+                        # Not a valid CQL2 filter
+                        query_parts.append({"match_all": {}})
+                else:
+                    # Not a dictionary
+                    query_parts.append({"match_all": {}})
+            except Exception as e:
+                logger = logging.getLogger(__name__)
+                logger.error(f"Error converting filter to Elasticsearch: {e}")
+                # If there's an error, add a query that matches nothing
+                query_parts.append({"bool": {"must_not": {"match_all": {}}}})
+                raise
+
+        # Combine all query parts with AND logic if there are multiple
+        if query_parts:
+            if len(query_parts) == 1:
+                body["query"] = query_parts[0]
+            else:
+                body["query"] = {"bool": {"must": query_parts}}
 
         # Execute the search
         response = await self.client.search(
diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py
@@ -163,7 +163,7 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes
     # Use unique prefixes to avoid conflicts between tests
     test_prefix = f"q-get-{uuid.uuid4().hex[:8]}"
 
-    # Create collections with different content to test free text search
+    # Create collections with different content to test structured filter
     test_collections = [
         {
             "id": f"{test_prefix}-sentinel",
@@ -226,3 +226,68 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes
     # Should only find the landsat collection
     assert len(found_collections) == 1
     assert found_collections[0]["id"] == f"{test_prefix}-modis"
+
+
+@pytest.mark.asyncio
+async def test_collections_filter_search(app_client, txn_client, load_test_data):
+    """Verify GET /collections honors the filter parameter for structured search."""
+    # Create multiple collections with different content
+    base_collection = load_test_data("test_collection.json")
+
+    # Use unique prefixes to avoid conflicts between tests
+    test_prefix = f"filter-{uuid.uuid4().hex[:8]}"
+
+    # Create collections with different content to test structured filter
+    test_collections = [
+        {
+            "id": f"{test_prefix}-sentinel",
+            "title": "Sentinel-2 Collection",
+            "description": "Collection of Sentinel-2 data",
+            "summaries": {"platform": ["sentinel-2a", "sentinel-2b"]},
+        },
+        {
+            "id": f"{test_prefix}-landsat",
+            "title": "Landsat Collection",
+            "description": "Collection of Landsat data",
+            "summaries": {"platform": ["landsat-8", "landsat-9"]},
+        },
+        {
+            "id": f"{test_prefix}-modis",
+            "title": "MODIS Collection",
+            "description": "Collection of MODIS data",
+            "summaries": {"platform": ["terra", "aqua"]},
+        },
+    ]
+
+    for i, coll in enumerate(test_collections):
+        test_collection = base_collection.copy()
+        test_collection["id"] = coll["id"]
+        test_collection["title"] = coll["title"]
+        test_collection["description"] = coll["description"]
+        test_collection["summaries"] = coll["summaries"]
+        await create_collection(txn_client, test_collection)
+
+    # Test structured filter for collections with specific ID
+    import json
+
+    # Create a simple filter for exact ID match - similar to what works in Postman
+    filter_expr = {"op": "=", "args": [{"property": "id"}, f"{test_prefix}-sentinel"]}
+
+    # Convert to JSON string for URL parameter
+    filter_json = json.dumps(filter_expr)
+
+    # Use the exact format that works in Postman
+    resp = await app_client.get(
+        f"/collections?filter={filter_json}",
+    )
+    assert resp.status_code == 200
+    resp_json = resp.json()
+
+    # Filter collections to only include the ones we created for this test
+    found_collections = [
+        c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
+    ]
+
+    # Should only find the sentinel collection
+    assert len(found_collections) == 1
+    assert found_collections[0]["id"] == f"{test_prefix}-sentinel"