-
Notifications
You must be signed in to change notification settings - Fork 42
fix type for advanced freetext and allow free-text for Item search #263
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
2fd8c8f
bb91b68
c0767f3
bbf0cb5
b5fdaba
947a977
eaf6dd8
af4b331
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,8 +54,7 @@ async def all_collections( # noqa: C901 | |
| sortby: Optional[str] = None, | ||
| filter_expr: Optional[str] = None, | ||
| filter_lang: Optional[str] = None, | ||
| q: Optional[List[str]] = None, | ||
| **kwargs, | ||
| **kwargs: Any, | ||
| ) -> Collections: | ||
| """Cross catalog search (GET). | ||
|
|
||
|
|
@@ -86,9 +85,15 @@ async def all_collections( # noqa: C901 | |
| sortby=sortby, | ||
| filter_query=filter_expr, | ||
| filter_lang=filter_lang, | ||
| q=q, | ||
| **kwargs, | ||
| ) | ||
|
|
||
| # NOTE: `FreeTextExtension` - pgstac will only accept `str` so we need to | ||
| # join the list[str] with ` OR ` | ||
| # ref: https://github.com/stac-utils/stac-fastapi-pgstac/pull/263 | ||
| if q := clean_args.pop("q", None): | ||
| clean_args["q"] = " OR ".join(q) if isinstance(q, list) else q | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need custom code to handle Note: we don't need this in items search because we will use pydantic serialization |
||
|
|
||
| async with request.app.state.get_connection(request, "r") as conn: | ||
| q, p = render( | ||
| """ | ||
|
|
@@ -157,7 +162,10 @@ async def all_collections( # noqa: C901 | |
| ) | ||
|
|
||
| async def get_collection( | ||
| self, collection_id: str, request: Request, **kwargs | ||
| self, | ||
| collection_id: str, | ||
| request: Request, | ||
| **kwargs: Any, | ||
| ) -> Collection: | ||
| """Get collection by id. | ||
|
|
||
|
|
@@ -202,7 +210,9 @@ async def get_collection( | |
| return Collection(**collection) | ||
|
|
||
| async def _get_base_item( | ||
| self, collection_id: str, request: Request | ||
| self, | ||
| collection_id: str, | ||
| request: Request, | ||
| ) -> Dict[str, Any]: | ||
| """Get the base item of a collection for use in rehydrating full item collection properties. | ||
|
|
||
|
|
@@ -359,7 +369,7 @@ async def item_collection( | |
| filter_expr: Optional[str] = None, | ||
| filter_lang: Optional[str] = None, | ||
| token: Optional[str] = None, | ||
| **kwargs, | ||
| **kwargs: Any, | ||
| ) -> ItemCollection: | ||
| """Get all items from a specific collection. | ||
|
|
||
|
|
@@ -391,6 +401,7 @@ async def item_collection( | |
| filter_lang=filter_lang, | ||
| fields=fields, | ||
| sortby=sortby, | ||
| **kwargs, | ||
| ) | ||
|
|
||
| try: | ||
|
|
@@ -417,7 +428,11 @@ async def item_collection( | |
| return ItemCollection(**item_collection) | ||
|
|
||
| async def get_item( | ||
| self, item_id: str, collection_id: str, request: Request, **kwargs | ||
| self, | ||
| item_id: str, | ||
| collection_id: str, | ||
| request: Request, | ||
| **kwargs: Any, | ||
| ) -> Item: | ||
| """Get item by id. | ||
|
|
||
|
|
@@ -445,7 +460,10 @@ async def get_item( | |
| return Item(**item_collection["features"][0]) | ||
|
|
||
| async def post_search( | ||
| self, search_request: PgstacSearch, request: Request, **kwargs | ||
| self, | ||
| search_request: PgstacSearch, | ||
| request: Request, | ||
| **kwargs: Any, | ||
| ) -> ItemCollection: | ||
| """Cross catalog search (POST). | ||
|
|
||
|
|
@@ -489,7 +507,7 @@ async def get_search( | |
| filter_expr: Optional[str] = None, | ||
| filter_lang: Optional[str] = None, | ||
| token: Optional[str] = None, | ||
| **kwargs, | ||
| **kwargs: Any, | ||
| ) -> ItemCollection: | ||
| """Cross catalog search (GET). | ||
|
|
||
|
|
@@ -516,6 +534,7 @@ async def get_search( | |
| sortby=sortby, | ||
| filter_query=filter_expr, | ||
| filter_lang=filter_lang, | ||
| **kwargs, | ||
| ) | ||
|
|
||
| try: | ||
|
|
@@ -550,7 +569,8 @@ def _clean_search_args( # noqa: C901 | |
| sortby: Optional[str] = None, | ||
| filter_query: Optional[str] = None, | ||
| filter_lang: Optional[str] = None, | ||
| q: Optional[List[str]] = None, | ||
| q: Optional[Union[str, List[str]]] = None, | ||
| **kwargs: Any, | ||
| ) -> Dict[str, Any]: | ||
| """Clean up search arguments to match format expected by pgstac""" | ||
| if filter_query: | ||
|
|
@@ -596,7 +616,7 @@ def _clean_search_args( # noqa: C901 | |
| base_args["fields"] = {"include": includes, "exclude": excludes} | ||
|
|
||
| if q: | ||
| base_args["q"] = " OR ".join(q) | ||
| base_args["q"] = q | ||
|
|
||
| # Remove None values from dict | ||
| clean = {} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| """pgstac extension customisations.""" | ||
|
|
||
| from .filter import FiltersClient | ||
| from .free_text import FreeTextExtension | ||
| from .query import QueryExtension | ||
|
|
||
| __all__ = ["QueryExtension", "FiltersClient"] | ||
| __all__ = ["QueryExtension", "FiltersClient", "FreeTextExtension"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| """Free-Text model for PgSTAC.""" | ||
|
|
||
| from typing import List, Optional | ||
|
|
||
| from pydantic import BaseModel, Field | ||
| from pydantic.functional_serializers import PlainSerializer | ||
| from stac_fastapi.extensions.core.free_text import ( | ||
| FreeTextExtension as FreeTextExtensionBase, | ||
| ) | ||
| from typing_extensions import Annotated | ||
|
|
||
|
|
||
| class FreeTextExtensionPostRequest(BaseModel): | ||
| """Free-text Extension POST request model.""" | ||
|
|
||
| q: Annotated[ | ||
| Optional[List[str]], | ||
| PlainSerializer(lambda x: " OR ".join(x), return_type=str, when_used="json"), | ||
| ] = Field( | ||
| None, | ||
| description="Parameter to perform free-text queries against STAC metadata", | ||
| ) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Custom |
||
|
|
||
|
|
||
| class FreeTextExtension(FreeTextExtensionBase): | ||
| """FreeText Extension. | ||
| Override the POST request model to add custom serialization | ||
| """ | ||
|
|
||
| POST = FreeTextExtensionPostRequest | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,7 @@ | |
| "type": "Polygon" | ||
| }, | ||
| "properties": { | ||
| "description": "Landat 8 imagery radiometrically calibrated and orthorectified using gound points and Digital Elevation Model (DEM) data to correct relief displacement.", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. free-text for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not relevant for this PR, but this is why item-level free-text search feels funny to me ... IMO this info should live at the collection level only. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the case of a collection where each item contains relatively the same information at different place/time, Item-level free-text search is indeed redundant. However, imagine the case of a collection regrouping multiple "conceptual" items, such as many AI models described using MLM extension. In this case, each Item could contain different descriptions and keywords within the same collection, which makes the free-text search very relevant at item-level. |
||
| "datetime": "2020-02-12T12:30:22Z", | ||
| "landsat:scene_id": "LC82081612020043LGN00", | ||
| "landsat:row": "161", | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are no tests for the Also, there could be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe this PR is meant to implement free-text item search, just correct some stuff around free-text collection search. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
if you're passing a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Exactly, but it also enables free-text for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Good point about the Pydantic model. I'm not sure to understand the " I was able to activate it is my implementation (https://github.com/crim-ca/stac-app/pull/28/files). It should do the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I just mean that As mentioned in #263 (comment) it works only for Advanced because we don't do |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -365,6 +365,71 @@ async def test_collection_search_freetext( | |
| assert resp.json()["collections"][0]["id"] == load_test2_collection.id | ||
|
|
||
| resp = await app_client.get( | ||
| "/collections", | ||
| params={"q": "temperature,calibrated"}, | ||
| ) | ||
| assert resp.json()["numberReturned"] == 2 | ||
| assert resp.json()["numberMatched"] == 2 | ||
| assert len(resp.json()["collections"]) == 2 | ||
|
|
||
| resp = await app_client.get( | ||
| "/collections", | ||
| params={"q": "temperature,yo"}, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| ) | ||
| assert resp.json()["numberReturned"] == 1 | ||
| assert resp.json()["numberMatched"] == 1 | ||
| assert len(resp.json()["collections"]) == 1 | ||
| assert resp.json()["collections"][0]["id"] == load_test2_collection.id | ||
|
|
||
| resp = await app_client.get( | ||
| "/collections", | ||
| params={"q": "nosuchthing"}, | ||
| ) | ||
| assert len(resp.json()["collections"]) == 0 | ||
|
|
||
|
|
||
| @requires_pgstac_0_9_2 | ||
| @pytest.mark.asyncio | ||
| async def test_collection_search_freetext_advanced( | ||
| app_client_advanced_freetext, load_test_collection, load_test2_collection | ||
| ): | ||
| # free-text | ||
| resp = await app_client_advanced_freetext.get( | ||
| "/collections", | ||
| params={"q": "temperature"}, | ||
| ) | ||
| assert resp.json()["numberReturned"] == 1 | ||
| assert resp.json()["numberMatched"] == 1 | ||
| assert len(resp.json()["collections"]) == 1 | ||
| assert resp.json()["collections"][0]["id"] == load_test2_collection.id | ||
|
|
||
| resp = await app_client_advanced_freetext.get( | ||
| "/collections", | ||
| params={"q": "temperature,calibrated"}, | ||
| ) | ||
| assert resp.json()["numberReturned"] == 2 | ||
| assert resp.json()["numberMatched"] == 2 | ||
| assert len(resp.json()["collections"]) == 2 | ||
|
|
||
| resp = await app_client_advanced_freetext.get( | ||
| "/collections", | ||
| params={"q": "temperature,yo"}, | ||
| ) | ||
| assert resp.json()["numberReturned"] == 1 | ||
| assert resp.json()["numberMatched"] == 1 | ||
| assert len(resp.json()["collections"]) == 1 | ||
| assert resp.json()["collections"][0]["id"] == load_test2_collection.id | ||
|
|
||
| resp = await app_client_advanced_freetext.get( | ||
| "/collections", | ||
| params={"q": "temperature OR yo"}, | ||
| ) | ||
| assert resp.json()["numberReturned"] == 1 | ||
| assert resp.json()["numberMatched"] == 1 | ||
| assert len(resp.json()["collections"]) == 1 | ||
| assert resp.json()["collections"][0]["id"] == load_test2_collection.id | ||
|
|
||
| resp = await app_client_advanced_freetext.get( | ||
| "/collections", | ||
| params={"q": "nosuchthing"}, | ||
| ) | ||
|
|
||

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this PR we changed and we now forward
kwargsto_clean_search_argsmethod.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens if
POST /searchwith{"q": 123}is submitted? Doesqmake it all the way to the DB and raises due to invalid types? There won't be any early API model validation of the parameter?