Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Fixed

- fix root-path handling when setting via env var or on app instance
- Allow `q` parameter to be a `str` not a `list[str]` for Advanced Free-Text extension

### Changed

Expand Down
3 changes: 1 addition & 2 deletions stac_fastapi/pgstac/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
CollectionSearchExtension,
CollectionSearchFilterExtension,
FieldsExtension,
FreeTextExtension,
ItemCollectionFilterExtension,
OffsetPaginationExtension,
SearchFilterExtension,
Expand All @@ -42,7 +41,7 @@
from stac_fastapi.pgstac.config import Settings
from stac_fastapi.pgstac.core import CoreCrudClient, health_check
from stac_fastapi.pgstac.db import close_db_connection, connect_to_db
from stac_fastapi.pgstac.extensions import QueryExtension
from stac_fastapi.pgstac.extensions import FreeTextExtension, QueryExtension
from stac_fastapi.pgstac.extensions.filter import FiltersClient
from stac_fastapi.pgstac.transactions import BulkTransactionsClient, TransactionsClient
from stac_fastapi.pgstac.types.search import PgstacSearch
Expand Down
42 changes: 31 additions & 11 deletions stac_fastapi/pgstac/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ async def all_collections( # noqa: C901
sortby: Optional[str] = None,
filter_expr: Optional[str] = None,
filter_lang: Optional[str] = None,
q: Optional[List[str]] = None,
**kwargs,
**kwargs: Any,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this PR we changed and we now forward kwargs to _clean_search_args method.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if POST /search with {"q": 123} is submitted? Does q make it all the way to the DB and raises due to invalid types? There won't be any early API model validation of the parameter?

) -> Collections:
"""Cross catalog search (GET).

Expand Down Expand Up @@ -86,9 +85,15 @@ async def all_collections( # noqa: C901
sortby=sortby,
filter_query=filter_expr,
filter_lang=filter_lang,
q=q,
**kwargs,
)

# NOTE: `FreeTextExtension` - pgstac will only accept `str` so we need to
# join the list[str] with ` OR `
# ref: https://github.com/stac-utils/stac-fastapi-pgstac/pull/263
if q := clean_args.pop("q", None):
clean_args["q"] = " OR ".join(q) if isinstance(q, list) else q
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need custom code to handle list[str] passed by collection-search Free-Text extension as pgstac will only accept str

Note: we don't need this in items search because we will use pydantic serialization


async with request.app.state.get_connection(request, "r") as conn:
q, p = render(
"""
Expand Down Expand Up @@ -157,7 +162,10 @@ async def all_collections( # noqa: C901
)

async def get_collection(
self, collection_id: str, request: Request, **kwargs
self,
collection_id: str,
request: Request,
**kwargs: Any,
) -> Collection:
"""Get collection by id.

Expand Down Expand Up @@ -202,7 +210,9 @@ async def get_collection(
return Collection(**collection)

async def _get_base_item(
self, collection_id: str, request: Request
self,
collection_id: str,
request: Request,
) -> Dict[str, Any]:
"""Get the base item of a collection for use in rehydrating full item collection properties.

Expand Down Expand Up @@ -359,7 +369,7 @@ async def item_collection(
filter_expr: Optional[str] = None,
filter_lang: Optional[str] = None,
token: Optional[str] = None,
**kwargs,
**kwargs: Any,
) -> ItemCollection:
"""Get all items from a specific collection.

Expand Down Expand Up @@ -391,6 +401,7 @@ async def item_collection(
filter_lang=filter_lang,
fields=fields,
sortby=sortby,
**kwargs,
)

try:
Expand All @@ -417,7 +428,11 @@ async def item_collection(
return ItemCollection(**item_collection)

async def get_item(
self, item_id: str, collection_id: str, request: Request, **kwargs
self,
item_id: str,
collection_id: str,
request: Request,
**kwargs: Any,
) -> Item:
"""Get item by id.

Expand Down Expand Up @@ -445,7 +460,10 @@ async def get_item(
return Item(**item_collection["features"][0])

async def post_search(
self, search_request: PgstacSearch, request: Request, **kwargs
self,
search_request: PgstacSearch,
request: Request,
**kwargs: Any,
) -> ItemCollection:
"""Cross catalog search (POST).

Expand Down Expand Up @@ -489,7 +507,7 @@ async def get_search(
filter_expr: Optional[str] = None,
filter_lang: Optional[str] = None,
token: Optional[str] = None,
**kwargs,
**kwargs: Any,
) -> ItemCollection:
"""Cross catalog search (GET).

Expand All @@ -516,6 +534,7 @@ async def get_search(
sortby=sortby,
filter_query=filter_expr,
filter_lang=filter_lang,
**kwargs,
)

try:
Expand Down Expand Up @@ -550,7 +569,8 @@ def _clean_search_args( # noqa: C901
sortby: Optional[str] = None,
filter_query: Optional[str] = None,
filter_lang: Optional[str] = None,
q: Optional[List[str]] = None,
q: Optional[Union[str, List[str]]] = None,
**kwargs: Any,
) -> Dict[str, Any]:
"""Clean up search arguments to match format expected by pgstac"""
if filter_query:
Expand Down Expand Up @@ -596,7 +616,7 @@ def _clean_search_args( # noqa: C901
base_args["fields"] = {"include": includes, "exclude": excludes}

if q:
base_args["q"] = " OR ".join(q)
base_args["q"] = q

# Remove None values from dict
clean = {}
Expand Down
3 changes: 2 additions & 1 deletion stac_fastapi/pgstac/extensions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""pgstac extension customisations."""

from .filter import FiltersClient
from .free_text import FreeTextExtension
from .query import QueryExtension

__all__ = ["QueryExtension", "FiltersClient"]
__all__ = ["QueryExtension", "FiltersClient", "FreeTextExtension"]
31 changes: 31 additions & 0 deletions stac_fastapi/pgstac/extensions/free_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Free-Text model for PgSTAC."""

from typing import List, Optional

from pydantic import BaseModel, Field
from pydantic.functional_serializers import PlainSerializer
from stac_fastapi.extensions.core.free_text import (
FreeTextExtension as FreeTextExtensionBase,
)
from typing_extensions import Annotated


class FreeTextExtensionPostRequest(BaseModel):
"""Free-text Extension POST request model."""

q: Annotated[
Optional[List[str]],
PlainSerializer(lambda x: " OR ".join(x), return_type=str, when_used="json"),
] = Field(
None,
description="Parameter to perform free-text queries against STAC metadata",
)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Custom FreeTextExtensionPostRequest model which will handle JSON serialization, transforming list[str] to str



class FreeTextExtension(FreeTextExtensionBase):
"""FreeText Extension.
Override the POST request model to add custom serialization
"""

POST = FreeTextExtensionPostRequest
64 changes: 58 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import logging
import os
import time
from typing import Callable, Dict
from urllib.parse import quote_plus as quote
from urllib.parse import urljoin
Expand All @@ -26,7 +25,7 @@
CollectionSearchExtension,
CollectionSearchFilterExtension,
FieldsExtension,
FreeTextExtension,
FreeTextAdvancedExtension,
ItemCollectionFilterExtension,
OffsetPaginationExtension,
SearchFilterExtension,
Expand All @@ -44,7 +43,7 @@
from stac_fastapi.pgstac.config import PostgresSettings, Settings
from stac_fastapi.pgstac.core import CoreCrudClient, health_check
from stac_fastapi.pgstac.db import close_db_connection, connect_to_db
from stac_fastapi.pgstac.extensions import QueryExtension
from stac_fastapi.pgstac.extensions import FreeTextExtension, QueryExtension
from stac_fastapi.pgstac.extensions.filter import FiltersClient
from stac_fastapi.pgstac.transactions import BulkTransactionsClient, TransactionsClient
from stac_fastapi.pgstac.types.search import PgstacSearch
Expand Down Expand Up @@ -139,6 +138,7 @@ def api_client(request):
FieldsExtension(),
SearchFilterExtension(client=FiltersClient()),
TokenPaginationExtension(),
FreeTextExtension(), # not recommended by PgSTAC
]
application_extensions.extend(search_extensions)

Expand Down Expand Up @@ -167,6 +167,7 @@ def api_client(request):
FieldsExtension(conformance_classes=[FieldsConformanceClasses.ITEMS]),
ItemCollectionFilterExtension(client=FiltersClient()),
TokenPaginationExtension(),
FreeTextExtension(), # not recommended by PgSTAC
]
application_extensions.extend(item_collection_extensions)

Expand Down Expand Up @@ -207,7 +208,6 @@ async def app(api_client, database):
pgdatabase=database.dbname,
)
logger.info("Creating app Fixture")
time.time()
app = api_client.app
await connect_to_db(
app,
Expand Down Expand Up @@ -314,7 +314,6 @@ async def app_no_ext(database):
pgdatabase=database.dbname,
)
logger.info("Creating app Fixture")
time.time()
await connect_to_db(
api_client_no_ext.app,
postgres_settings=postgres_settings,
Expand Down Expand Up @@ -354,7 +353,6 @@ async def app_no_transaction(database):
pgdatabase=database.dbname,
)
logger.info("Creating app Fixture")
time.time()
await connect_to_db(
api.app,
postgres_settings=postgres_settings,
Expand Down Expand Up @@ -402,3 +400,57 @@ async def default_client(default_app):
transport=ASGITransport(app=default_app), base_url="http://test"
) as c:
yield c


@pytest.fixture(scope="function")
async def app_advanced_freetext(database):
"""Default stac-fastapi-pgstac application without only the transaction extensions."""
api_settings = Settings(testing=True)

application_extensions = [
TransactionExtension(client=TransactionsClient(), settings=api_settings)
]

collection_extensions = [
FreeTextAdvancedExtension(),
OffsetPaginationExtension(),
]
collection_search_extension = CollectionSearchExtension.from_extensions(
collection_extensions
)
application_extensions.append(collection_search_extension)

app = StacApi(
settings=api_settings,
extensions=application_extensions,
client=CoreCrudClient(),
health_check=health_check,
collections_get_request_model=collection_search_extension.GET,
)

postgres_settings = PostgresSettings(
pguser=database.user,
pgpassword=database.password,
pghost=database.host,
pgport=database.port,
pgdatabase=database.dbname,
)
logger.info("Creating app Fixture")
await connect_to_db(
app.app,
postgres_settings=postgres_settings,
add_write_connection_pool=True,
)
yield app.app
await close_db_connection(app.app)

logger.info("Closed Pools.")


@pytest.fixture(scope="function")
async def app_client_advanced_freetext(app_advanced_freetext):
logger.info("creating app_client")
async with AsyncClient(
transport=ASGITransport(app=app_advanced_freetext), base_url="http://test"
) as c:
yield c
1 change: 1 addition & 0 deletions tests/data/test_item.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"type": "Polygon"
},
"properties": {
"description": "Landat 8 imagery radiometrically calibrated and orthorectified using gound points and Digital Elevation Model (DEM) data to correct relief displacement.",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

free-text for items will only work within properties (title, description keywords)

https://github.com/stac-utils/pgstac/blob/45ac2478b58946529872ec3feed0ee0c838c4742/src/pgstac/sql/004_search.sql#L225-L227

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not relevant for this PR, but this is why item-level free-text search feels funny to me ... IMO this info should live at the collection level only.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case of a collection where each item contains relatively the same information at different place/time, Item-level free-text search is indeed redundant. However, imagine the case of a collection regrouping multiple "conceptual" items, such as many AI models described using MLM extension. In this case, each Item could contain different descriptions and keywords within the same collection, which makes the free-text search very relevant at item-level.

"datetime": "2020-02-12T12:30:22Z",
"landsat:scene_id": "LC82081612020043LGN00",
"landsat:row": "161",
Expand Down
65 changes: 65 additions & 0 deletions tests/resources/test_collection.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no tests for the /search?q= and /collections/{col}/items?q=... cases?

Also, there could be POST /search with {"q": "advanced AND search"} or {"q": ["basic", "search"]}.

Copy link
Member

@gadomski gadomski Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe this PR is meant to implement free-text item search, just correct some stuff around free-text collection search.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function self._clean_search_args calls are updated with the **kwargs, so q will now trickle down within these calls as well (which is good), and should be considered (but could be in a separate PR though not to block this one).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

q won't be in /search because we don't usually use the free-text extension for items

if you're passing a dict, pydantic should raise a validation https://github.com/stac-utils/stac-fastapi/blob/fa42985255fad0bab7dbe3aadbf1f74cb1635f3a/stac_fastapi/extensions/stac_fastapi/extensions/core/free_text/request.py#L37-L43

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe this PR is meant to implement free-text item search, just correct some stuff around free-text collection search.

Exactly, but it also enables free-text for items by using kwargs, as for other unknown extension people would want to implement. Now they would have to just pass a custom _clean_search_args to support any kind of input passed through kwargs

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Good point about the Pydantic model.

I'm not sure to understand the "q won't be in /search".
Isn't it available if the conformance is applied?
https://github.com/stac-utils/stac-fastapi/blob/fa42985255fad0bab7dbe3aadbf1f74cb1635f3a/stac_fastapi/extensions/stac_fastapi/extensions/core/free_text/free_text.py#L27-L40

I was able to activate it is my implementation (https://github.com/crim-ca/stac-app/pull/28/files). It should do the title/description/keywords free-text search across all collections' items, as if filter or query was used, no?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure to understand the "q won't be in /search".

I just mean that q parameter will ONLY be available if enabled at the application level.

As mentioned in #263 (comment) it works only for Advanced because we don't do str -> list -> str transformation but keep the values as str

Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,71 @@ async def test_collection_search_freetext(
assert resp.json()["collections"][0]["id"] == load_test2_collection.id

resp = await app_client.get(
"/collections",
params={"q": "temperature,calibrated"},
)
assert resp.json()["numberReturned"] == 2
assert resp.json()["numberMatched"] == 2
assert len(resp.json()["collections"]) == 2

resp = await app_client.get(
"/collections",
params={"q": "temperature,yo"},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

)
assert resp.json()["numberReturned"] == 1
assert resp.json()["numberMatched"] == 1
assert len(resp.json()["collections"]) == 1
assert resp.json()["collections"][0]["id"] == load_test2_collection.id

resp = await app_client.get(
"/collections",
params={"q": "nosuchthing"},
)
assert len(resp.json()["collections"]) == 0


@requires_pgstac_0_9_2
@pytest.mark.asyncio
async def test_collection_search_freetext_advanced(
app_client_advanced_freetext, load_test_collection, load_test2_collection
):
# free-text
resp = await app_client_advanced_freetext.get(
"/collections",
params={"q": "temperature"},
)
assert resp.json()["numberReturned"] == 1
assert resp.json()["numberMatched"] == 1
assert len(resp.json()["collections"]) == 1
assert resp.json()["collections"][0]["id"] == load_test2_collection.id

resp = await app_client_advanced_freetext.get(
"/collections",
params={"q": "temperature,calibrated"},
)
assert resp.json()["numberReturned"] == 2
assert resp.json()["numberMatched"] == 2
assert len(resp.json()["collections"]) == 2

resp = await app_client_advanced_freetext.get(
"/collections",
params={"q": "temperature,yo"},
)
assert resp.json()["numberReturned"] == 1
assert resp.json()["numberMatched"] == 1
assert len(resp.json()["collections"]) == 1
assert resp.json()["collections"][0]["id"] == load_test2_collection.id

resp = await app_client_advanced_freetext.get(
"/collections",
params={"q": "temperature OR yo"},
)
assert resp.json()["numberReturned"] == 1
assert resp.json()["numberMatched"] == 1
assert len(resp.json()["collections"]) == 1
assert resp.json()["collections"][0]["id"] == load_test2_collection.id

resp = await app_client_advanced_freetext.get(
"/collections",
params={"q": "nosuchthing"},
)
Expand Down
Loading
Loading