Skip to content

Commit dd0a439

Browse files
committed
Make the LLM model and provider configurable
1 parent 5e3aaf3 commit dd0a439

File tree

5 files changed

+69
-36
lines changed

5 files changed

+69
-36
lines changed

.env.example

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1+
# For OPENAI
12
OPENAI_API_KEY="your-api-key"
3+
SMALL_MODEL_NAME="openai:gpt-4.1-mini"
4+
5+
# For Mistral
6+
# MISTRAL_API_KEY="your-mistral-key"
7+
# SMALL_MODEL_NAME="mistral:mistral-medium-latest"
8+
29
GEODINI_API="https://geodini.k8s.labs.ds.io"
10+
311
STAC_CATALOG_NAME="planetarycomputer"
4-
STAC_CATALOG_URL="https://planetarycomputer.microsoft.com/api/stac/v1"
12+
STAC_CATALOG_URL="https://planetarycomputer.microsoft.com/api/stac/v1"

README.md

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,20 @@ STAC Semantic Search allows users to find satellite imagery and geospatial data
1515
- **Visual Results**: Interactive maps showing search results and areas of interest
1616

1717

18+
## 🚧 Early Prototype
19+
20+
This project is still in its very early stages and under active development. You might encounter some quirks or rough edges, but please give it a try and let us know what you think!
21+
22+
1823
## 🚀 Getting Started
1924

2025
### Prerequisites
2126

22-
- Python 3.12+
23-
- OpenAI API key
24-
- Docker (optional)
27+
- OpenAI API key / Anthropic API key / Mistral API key (or any other LLM [supported by Pydantic AI](https://ai.pydantic.dev/models/))
28+
- Docker and docker compose
29+
- A STAC catalog to search through
30+
- A [Geodini instance](https://github.com/sunu/geodini) to resolve geolocation queries
31+
2532

2633
### Installation
2734

@@ -55,7 +62,7 @@ STAC Semantic Search allows users to find satellite imagery and geospatial data
5562
```bash
5663
curl -X POST "http://localhost:8000/search" \
5764
-H "Content-Type: application/json" \
58-
-d '{"query": "Sentinel-2 imagery over France"}'
65+
-d '{"query": "Sentinel-2 imagery"}'
5966
```
6067

6168
**Search Items**

helm-chart/Chart.yaml

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,9 @@
11
apiVersion: v2
2-
name: stac-search
3-
description: A Helm chart for STAC Semantic Search application
2+
name: stac-semantic-search
3+
description: A Helm chart for STAC Semantic Search API and Streamlit UI
44
type: application
55
version: 0.1.0
66
appVersion: "0.1.0"
7-
keywords:
8-
- stac
9-
- search
10-
- semantic
11-
- geospatial
12-
- satellite
13-
home: https://github.com/your-org/stac-semantic-search
7+
home: https://github.com/developmentseed/stac-semantic-search
148
sources:
15-
- https://github.com/your-org/stac-semantic-search
16-
maintainers:
17-
- name: Your Name
18-
9+
- https://github.com/developmentseed/stac-semantic-search

stac_search/agents/collections_search.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,12 @@
1919
# Constants
2020
MODEL_NAME = "all-MiniLM-L6-v2"
2121
DATA_PATH = os.environ.get("DATA_PATH", "data/chromadb")
22-
OPENAI_MODEL = "gpt-4o-mini"
2322

2423
STAC_CATALOG_NAME = os.getenv("STAC_CATALOG_NAME", "planetarycomputer")
2524
STAC_COLLECTIONS_URL = os.getenv(
2625
"STAC_COLLECTIONS_URL", "https://planetarycomputer.microsoft.com/api/stac/v1"
2726
)
28-
29-
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)
27+
SMALL_MODEL_NAME = os.getenv("SMALL_MODEL_NAME", "openai:gpt-4.1-mini")
3028

3129

3230
@dataclass
@@ -49,7 +47,7 @@ class RankedCollections:
4947

5048

5149
rerank_agent = Agent(
52-
"openai:gpt-4o-mini",
50+
SMALL_MODEL_NAME,
5351
result_type=RankedCollections,
5452
deps_type=RerankContext,
5553
system_prompt="""

stac_search/agents/items_search.py

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import os
44
from dataclasses import dataclass
55
from pprint import pformat
6-
from typing import List, Dict, Any
6+
from typing import List, Dict, Any, Union
77

88
import requests
99
from pydantic_ai import Agent, RunContext
1010
from pystac_client import Client
11+
from pydantic import BaseModel
1112

1213
from stac_search.agents.collections_search import (
1314
collection_search,
@@ -16,6 +17,10 @@
1617

1718

1819
GEODINI_API = os.getenv("GEODINI_API", "https://geodini.k8s.labs.ds.io")
20+
SMALL_MODEL_NAME = os.getenv("SMALL_MODEL_NAME", "openai:gpt-4.1-mini")
21+
STAC_CATALOG_URL = os.getenv(
22+
"STAC_CATALOG_URL", "https://planetarycomputer.microsoft.com/api/stac/v1"
23+
)
1924

2025
logger = logging.getLogger(__name__)
2126

@@ -38,7 +43,7 @@ class ItemSearchParams:
3843

3944

4045
search_items_agent = Agent(
41-
"openai:gpt-4o-mini",
46+
SMALL_MODEL_NAME,
4247
result_type=ItemSearchParams,
4348
deps_type=Context,
4449
system_prompt=f"""
@@ -60,7 +65,7 @@ class CollectionQuery:
6065

6166

6267
collection_query_framing_agent = Agent(
63-
"openai:gpt-4o-mini",
68+
SMALL_MODEL_NAME,
6469
result_type=CollectionQuery,
6570
system_prompt="""
6671
The user query is searching for relevant satellite imagery.
@@ -108,7 +113,7 @@ class GeocodingResult:
108113

109114

110115
geocoding_agent = Agent(
111-
"openai:gpt-4o-mini",
116+
SMALL_MODEL_NAME,
112117
result_type=GeocodingResult,
113118
system_prompt="""
114119
For the given query, if it contains a location, return location query to be used to search for the location.
@@ -134,7 +139,7 @@ class TemporalRangeResult:
134139

135140

136141
temporal_range_agent = Agent(
137-
"openai:gpt-4o-mini",
142+
SMALL_MODEL_NAME,
138143
result_type=TemporalRangeResult,
139144
system_prompt="""
140145
For the given query, if it contains a temporal range, return the start date and end date. If it doesn't contain a temporal range, return None.
@@ -151,17 +156,42 @@ async def set_temporal_range(ctx: RunContext[Context]) -> TemporalRangeResult:
151156
return result.data
152157

153158

154-
@dataclass
155-
class Cql2Filter:
156-
"""Parameters to be used to query the STAC API"""
159+
class PropertyRef(BaseModel):
160+
property: str
161+
162+
163+
class Geometry(BaseModel):
164+
type: str
165+
coordinates: Any
166+
167+
168+
class GeometryLiteral(BaseModel):
169+
geometry: Geometry
170+
157171

172+
class PeriodLiteral(BaseModel):
173+
period: List[str]
174+
175+
176+
FilterArg = Union[
177+
"FilterExpr", PropertyRef, GeometryLiteral, PeriodLiteral, int, float, str
178+
]
179+
180+
181+
class FilterExpr(BaseModel):
158182
op: str
159-
args: List[Any]
183+
args: List[FilterArg]
184+
185+
class Config:
186+
arbitrary_types_allowed = True
187+
188+
189+
FilterExpr.update_forward_refs()
160190

161191

162192
cql2_filter_agent = Agent(
163-
"openai:gpt-4o-mini",
164-
result_type=Cql2Filter,
193+
SMALL_MODEL_NAME,
194+
result_type=FilterExpr,
165195
system_prompt="""
166196
For the given query, construct a CQL2 filter to be used to query the STAC API only if required.
167197
Return None if the query doesn't require a CQL2 filter or if you can't determine the filter or if the property is not supported.
@@ -200,7 +230,7 @@ class Cql2Filter:
200230

201231

202232
@search_items_agent.tool
203-
async def construct_cql2_filter(ctx: RunContext[Context]) -> Cql2Filter:
233+
async def construct_cql2_filter(ctx: RunContext[Context]) -> FilterExpr | None:
204234
return await cql2_filter_agent.run(ctx.deps.query)
205235

206236

@@ -259,8 +289,7 @@ async def item_search(ctx: Context) -> ItemSearchResult:
259289
collections_to_search = default_target_collections
260290

261291
# Actually perform the search
262-
CATALOG_URL = "https://planetarycomputer.microsoft.com/api/stac/v1"
263-
client = Client.open(CATALOG_URL)
292+
client = Client.open(STAC_CATALOG_URL)
264293
params = {
265294
"max_items": 20,
266295
# looks like collections is required by Planetary Computer STAC API

0 commit comments

Comments
 (0)