query ext scratch

jonhealy1 · jonhealy1 · commit 038db368399b · 2025-09-27T19:53:44.000+08:00
diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py
@@ -231,6 +231,7 @@ async def all_collections(
         filter_expr: Optional[str] = None,
         filter_lang: Optional[str] = None,
         q: Optional[Union[str, List[str]]] = None,
+        query: Optional[str] = None,
         **kwargs,
     ) -> stac_types.Collections:
         """Read all collections from the database.
@@ -239,6 +240,7 @@ async def all_collections(
             fields (Optional[List[str]]): Fields to include or exclude from the results.
             sortby (Optional[str]): Sorting options for the results.
             filter_expr (Optional[str]): Structured filter expression in CQL2 JSON or CQL2-text format.
+            query (Optional[str]): Legacy query parameter (deprecated).
             filter_lang (Optional[str]): Must be 'cql2-json' or 'cql2-text' if specified, other values will result in an error.
             q (Optional[Union[str, List[str]]]): Free text search terms.
             **kwargs: Keyword arguments from the request.
@@ -280,19 +282,32 @@ async def all_collections(
         if q is not None:
             q_list = [q] if isinstance(q, str) else q
 
+        # Parse the query parameter if provided
+        parsed_query = None
+        if query is not None:
+            try:
+                import orjson
+
+                parsed_query = orjson.loads(query)
+            except Exception as e:
+                raise HTTPException(
+                    status_code=400, detail=f"Invalid query parameter: {e}"
+                )
+
         # Parse the filter parameter if provided
         parsed_filter = None
         if filter_expr is not None:
             try:
-                # Check if filter_lang is specified and not one of the supported formats
+                # Only raise an error for explicitly unsupported filter languages
+                # Allow None, cql2-json, and cql2-text (we'll treat it as JSON)
                 if filter_lang is not None and filter_lang not in [
                     "cql2-json",
                     "cql2-text",
                 ]:
                     # Raise an error for unsupported filter languages
                     raise HTTPException(
                         status_code=400,
-                        detail=f"Input should be 'cql2-json' or 'cql2-text' for collections. Got '{filter_lang}'.",
+                        detail=f"Only 'cql2-json' and 'cql2-text' filter languages are supported for collections. Got '{filter_lang}'.",
                     )
 
                 # Handle different filter formats
@@ -335,6 +350,7 @@ async def all_collections(
             sort=sort,
             q=q_list,
             filter=parsed_filter,
+            query=parsed_query,
         )
 
         # Apply field filtering if fields parameter was provided
diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py
@@ -177,6 +177,7 @@ async def get_all_collections(
         sort: Optional[List[Dict[str, Any]]] = None,
         q: Optional[List[str]] = None,
         filter: Optional[Dict[str, Any]] = None,
+        query: Optional[Dict[str, Dict[str, Any]]] = None,
     ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
         """Retrieve a list of collections from Elasticsearch, supporting pagination.
 
@@ -186,7 +187,8 @@ async def get_all_collections(
             request (Request): The FastAPI request object.
             sort (Optional[List[Dict[str, Any]]]): Optional sort parameter from the request.
             q (Optional[List[str]]): Free text search terms.
-            filter (Optional[Dict[str, Any]]): Structured query in CQL2 format.
+            filter (Optional[Dict[str, Any]]): Structured filter in CQL2 format.
+            query (Optional[Dict[str, Dict[str, Any]]]): Query extension parameters.
 
         Returns:
             A tuple of (collections, next pagination token if any).
@@ -270,7 +272,50 @@ async def get_all_collections(
             es_query = filter_module.to_es(await self.get_queryables_mapping(), filter)
             query_parts.append(es_query)
 
-        # Combine all query parts with AND logic
+        # Apply query extension if provided
+        if query:
+            try:
+                # Process each field and operator in the query
+                for field_name, expr in query.items():
+                    for op, value in expr.items():
+                        # Handle different operators
+                        if op == "eq":
+                            # Equality operator
+                            # Use different query types based on field name
+                            if field_name in ["title", "description"]:
+                                # For text fields, use match_phrase for exact phrase matching
+                                query_part = {"match_phrase": {field_name: value}}
+                            else:
+                                # For other fields, use term query for exact matching
+                                query_part = {"term": {field_name: value}}
+                            query_parts.append(query_part)
+                        elif op == "neq":
+                            # Not equal operator
+                            query_part = {
+                                "bool": {"must_not": [{"term": {field_name: value}}]}
+                            }
+                            print(f"Adding neq query part: {query_part}")
+                            query_parts.append(query_part)
+                        elif op in ["lt", "lte", "gt", "gte"]:
+                            # Range operators
+                            query_parts.append({"range": {field_name: {op: value}}})
+                        elif op == "in":
+                            # In operator (value should be a list)
+                            if isinstance(value, list):
+                                query_parts.append({"terms": {field_name: value}})
+                            else:
+                                query_parts.append({"term": {field_name: value}})
+                        elif op == "contains":
+                            # Contains operator for arrays
+                            query_parts.append({"term": {field_name: value}})
+            except Exception as e:
+                logger = logging.getLogger(__name__)
+                logger.error(f"Error converting query to Elasticsearch: {e}")
+                # If there's an error, add a query that matches nothing
+                query_parts.append({"bool": {"must_not": {"match_all": {}}}})
+                raise
+
+        # Combine all query parts with AND logic if there are multiple
         if query_parts:
             body["query"] = (
                 query_parts[0]
diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py
@@ -7,10 +7,10 @@
 
 
 @pytest.mark.asyncio
-async def test_collections_sort_id_asc(app_client, txn_client, load_test_data):
+async def test_collections_sort_id_asc(app_client, txn_client, ctx):
     """Verify GET /collections honors ascending sort on id."""
     # Create multiple collections with different ids
-    base_collection = load_test_data("test_collection.json")
+    base_collection = ctx.collection
 
     # Create collections with ids in a specific order to test sorting
     # Use unique prefixes to avoid conflicts between tests
@@ -23,6 +23,8 @@ async def test_collections_sort_id_asc(app_client, txn_client, load_test_data):
         test_collection["title"] = f"Test Collection {i}"
         await create_collection(txn_client, test_collection)
 
+    await refresh_indices(txn_client)
+
     # Test ascending sort by id
     resp = await app_client.get(
         "/collections",
@@ -44,10 +46,10 @@ async def test_collections_sort_id_asc(app_client, txn_client, load_test_data):
 
 
 @pytest.mark.asyncio
-async def test_collections_sort_id_desc(app_client, txn_client, load_test_data):
+async def test_collections_sort_id_desc(app_client, txn_client, ctx):
     """Verify GET /collections honors descending sort on id."""
     # Create multiple collections with different ids
-    base_collection = load_test_data("test_collection.json")
+    base_collection = ctx.collection
 
     # Create collections with ids in a specific order to test sorting
     # Use unique prefixes to avoid conflicts between tests
@@ -60,6 +62,8 @@ async def test_collections_sort_id_desc(app_client, txn_client, load_test_data):
         test_collection["title"] = f"Test Collection {i}"
         await create_collection(txn_client, test_collection)
 
+    await refresh_indices(txn_client)
+
     # Test descending sort by id
     resp = await app_client.get(
         "/collections",
@@ -81,10 +85,10 @@ async def test_collections_sort_id_desc(app_client, txn_client, load_test_data):
 
 
 @pytest.mark.asyncio
-async def test_collections_fields(app_client, txn_client, load_test_data):
+async def test_collections_fields(app_client, txn_client, ctx):
     """Verify GET /collections honors the fields parameter."""
     # Create multiple collections with different ids
-    base_collection = load_test_data("test_collection.json")
+    base_collection = ctx.collection
 
     # Create collections with ids in a specific order to test fields
     # Use unique prefixes to avoid conflicts between tests
@@ -98,6 +102,8 @@ async def test_collections_fields(app_client, txn_client, load_test_data):
         test_collection["description"] = f"Description for collection {i}"
         await create_collection(txn_client, test_collection)
 
+    await refresh_indices(txn_client)
+
     # Test include fields parameter
     resp = await app_client.get(
         "/collections",
@@ -156,10 +162,10 @@ async def test_collections_fields(app_client, txn_client, load_test_data):
 
 
 @pytest.mark.asyncio
-async def test_collections_free_text_search_get(app_client, txn_client, load_test_data):
+async def test_collections_free_text_search_get(app_client, txn_client, ctx):
     """Verify GET /collections honors the q parameter for free text search."""
     # Create multiple collections with different content
-    base_collection = load_test_data("test_collection.json")
+    base_collection = ctx.collection
 
     # Use unique prefixes to avoid conflicts between tests
     test_prefix = f"q-get-{uuid.uuid4().hex[:8]}"
@@ -193,6 +199,8 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes
         test_collection["summaries"] = coll["summaries"]
         await create_collection(txn_client, test_collection)
 
+    await refresh_indices(txn_client)
+
     # Test free text search for "sentinel"
     resp = await app_client.get(
         "/collections",
@@ -229,10 +237,10 @@ async def test_collections_free_text_search_get(app_client, txn_client, load_tes
 
 
 @pytest.mark.asyncio
-async def test_collections_filter_search(app_client, txn_client, load_test_data):
+async def test_collections_filter_search(app_client, txn_client, ctx):
     """Verify GET /collections honors the filter parameter for structured search."""
     # Create multiple collections with different content
-    base_collection = load_test_data("test_collection.json")
+    base_collection = ctx.collection
 
     # Use unique prefixes to avoid conflicts between tests
     test_prefix = f"filter-{uuid.uuid4().hex[:8]}"
@@ -313,3 +321,123 @@ async def test_collections_filter_search(app_client, txn_client, load_test_data)
     assert (
         len(found_collections) >= 1
     ), f"Expected at least 1 collection with ID {test_collection_id} using LIKE filter"
+
+
+@pytest.mark.asyncio
+async def test_collections_query_extension(app_client, txn_client, ctx):
+    """Verify GET /collections honors the query extension."""
+    # Create multiple collections with different content
+    base_collection = ctx.collection
+    # Use unique prefixes to avoid conflicts between tests
+    test_prefix = f"query-ext-{uuid.uuid4().hex[:8]}"
+
+    # Create collections with different content to test query extension
+    test_collections = [
+        {
+            "id": f"{test_prefix}-sentinel",
+            "title": "Sentinel-2 Collection",
+            "description": "Collection of Sentinel-2 data",
+            "summaries": {"platform": ["sentinel-2a", "sentinel-2b"]},
+        },
+        {
+            "id": f"{test_prefix}-landsat",
+            "title": "Landsat Collection",
+            "description": "Collection of Landsat data",
+            "summaries": {"platform": ["landsat-8", "landsat-9"]},
+        },
+        {
+            "id": f"{test_prefix}-modis",
+            "title": "MODIS Collection",
+            "description": "Collection of MODIS data",
+            "summaries": {"platform": ["terra", "aqua"]},
+        },
+    ]
+
+    for i, coll in enumerate(test_collections):
+        test_collection = base_collection.copy()
+        test_collection["id"] = coll["id"]
+        test_collection["title"] = coll["title"]
+        test_collection["description"] = coll["description"]
+        test_collection["summaries"] = coll["summaries"]
+        await create_collection(txn_client, test_collection)
+
+    await refresh_indices(txn_client)
+
+    # Test query extension for exact ID match
+    import json
+
+    # Use the exact ID that was created
+    sentinel_id = f"{test_prefix}-sentinel"
+    print(f"Searching for ID: {sentinel_id}")
+
+    query = {"id": {"eq": sentinel_id}}
+
+    resp = await app_client.get(
+        "/collections",
+        params=[("query", json.dumps(query))],
+    )
+    assert resp.status_code == 200
+    resp_json = resp.json()
+
+    # Filter collections to only include the ones we created for this test
+    found_collections = [
+        c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
+    ]
+
+    # Should only find the sentinel collection
+    assert len(found_collections) == 1
+    assert found_collections[0]["id"] == f"{test_prefix}-sentinel"
+
+    # Test query extension with equal operator on ID
+    query = {"id": {"eq": f"{test_prefix}-sentinel"}}
+
+    resp = await app_client.get(
+        "/collections",
+        params=[("query", json.dumps(query))],
+    )
+    assert resp.status_code == 200
+    resp_json = resp.json()
+
+    # Filter collections to only include the ones we created for this test
+    found_collections = [
+        c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
+    ]
+    found_ids = [c["id"] for c in found_collections]
+
+    # Should find landsat and modis collections but not sentinel
+    assert len(found_collections) == 1
+    assert f"{test_prefix}-sentinel" in found_ids
+    assert f"{test_prefix}-landsat" not in found_ids
+    assert f"{test_prefix}-modis" not in found_ids
+
+    # Test query extension with not-equal operator on ID
+    query = {"id": {"neq": f"{test_prefix}-sentinel"}}
+
+    print(f"\nTesting neq query: {query}")
+    print(f"JSON query: {json.dumps(query)}")
+
+    resp = await app_client.get(
+        "/collections",
+        params=[("query", json.dumps(query))],
+    )
+    print(f"Response status: {resp.status_code}")
+    assert resp.status_code == 200
+    resp_json = resp.json()
+    print(f"Response JSON keys: {resp_json.keys()}")
+    print(f"Number of collections in response: {len(resp_json.get('collections', []))}")
+
+    # Print all collection IDs in the response
+    all_ids = [c["id"] for c in resp_json.get("collections", [])]
+    print(f"All collection IDs in response: {all_ids}")
+
+    # Filter collections to only include the ones we created for this test
+    found_collections = [
+        c for c in resp_json["collections"] if c["id"].startswith(test_prefix)
+    ]
+    found_ids = [c["id"] for c in found_collections]
+
+    # Should find landsat and modis collections but not sentinel
+    assert len(found_collections) == 2
+    assert f"{test_prefix}-sentinel" not in found_ids
+    assert f"{test_prefix}-landsat" in found_ids
+    assert f"{test_prefix}-modis" in found_ids