Cache expensive calls to LLMs (#5)

sunu · web-flow · commit 88e3794bb702 · 2025-07-01T11:28:19.000+05:30
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
     "pydantic-ai",
     "shapely",
     "aiohttp",
+    "cachetools>=5.0.0",
 ]
 
 [tool.setuptools]
diff --git a/stac_search/agents/collections_search.py b/stac_search/agents/collections_search.py
@@ -12,6 +12,7 @@
 
 from pydantic_ai import Agent
 from stac_search.catalog_manager import CatalogManager
+from stac_search.cache import async_cached, embedding_cache, agent_cache
 
 
 logger = logging.getLogger(__name__)
@@ -59,6 +60,19 @@ class RankedCollections:
 )
 
 
+@async_cached(embedding_cache)
+async def _generate_query_embedding(catalog_manager, query: str):
+    """Generate cached embedding for query string"""
+    return await asyncio.to_thread(catalog_manager.model.encode, [query])
+
+
+@async_cached(agent_cache)
+async def _run_rerank_agent(user_prompt: str) -> RankedCollections:
+    """Run the rerank agent with caching"""
+    result = await rerank_agent.run(user_prompt)
+    return result.data
+
+
 async def collection_search(
     query: str,
     top_k: int = 5,
@@ -98,7 +112,7 @@ async def collection_search(
     logger.info(f"Model loading time: {load_model_time - start_time:.4f} seconds")
 
     # Generate query embedding
-    query_embedding = await asyncio.to_thread(catalog_manager.model.encode, [query])
+    query_embedding = await _generate_query_embedding(catalog_manager, query)
 
     # Search vector database
     results = await asyncio.to_thread(
@@ -122,9 +136,9 @@ async def collection_search(
 {collections_text}
 """
 
-    agent_result = await rerank_agent.run(user_prompt)
+    agent_result = await _run_rerank_agent(user_prompt)
 
-    return agent_result.data.results
+    return agent_result.results
 
 
 async def main():
diff --git a/stac_search/agents/items_search.py b/stac_search/agents/items_search.py
@@ -2,12 +2,11 @@
 import json
 import logging
 import os
-from dataclasses import dataclass
+from dataclasses import dataclass, asdict
 from pprint import pformat
 import time
 import asyncio
 from typing import List, Dict, Any, Union
-
 import aiohttp
 from pydantic_ai import Agent, RunContext
 from pystac_client import Client
@@ -17,6 +16,7 @@
     collection_search,
     CollectionWithExplanation,
 )
+from stac_search.cache import async_cached, agent_cache, geocoding_cache
 
 
 GEODINI_API = os.getenv("GEODINI_API", "https://geodini.k8s.labs.ds.io")
@@ -70,6 +70,12 @@ def search_items_agent_system_prompt():
     return f"The current date is {date.today()}"
 
 
+@async_cached(agent_cache)
+async def _run_search_items_agent(query: str, deps: dict) -> ItemSearchParams:
+    result = await search_items_agent.run(query, deps=Context(**deps))
+    return result.data
+
+
 @dataclass
 class CollectionQuery:
     query: str
@@ -108,15 +114,22 @@ class CollectionSearchResult:
     collections: List[CollectionWithExplanation]
 
 
+@async_cached(agent_cache)
+async def _run_collection_query_framing_agent(query: str) -> CollectionQuery:
+    result = await collection_query_framing_agent.run(query)
+    return result.data
+
+
+@async_cached(agent_cache)
 async def search_collections(
     query: str, catalog_url: str = None
 ) -> CollectionSearchResult | None:
     logger.info("Searching for relevant collections ...")
-    collection_query = await collection_query_framing_agent.run(query)
-    logger.info(f"Framed collection query: {collection_query.data.query}")
-    if collection_query.data.is_specific:
+    collection_query = await _run_collection_query_framing_agent(query)
+    logger.info(f"Framed collection query: {collection_query.query}")
+    if collection_query.is_specific:
         collections = await collection_search(
-            collection_query.data.query, catalog_url=catalog_url
+            collection_query.query, catalog_url=catalog_url
         )
         return CollectionSearchResult(collections=collections)
     else:
@@ -143,10 +156,15 @@ class GeocodingResult:
 )
 
 
+@async_cached(geocoding_cache)
+async def _run_geocoding_agent(query: str) -> GeocodingResult:
+    result = await geocoding_agent.run(query)
+    return result.data
+
+
 @search_items_agent.tool
 async def set_spatial_extent(ctx: RunContext[Context]) -> GeocodingResult:
-    result = await geocoding_agent.run(ctx.deps.query)
-    return result.data
+    return await _run_geocoding_agent(ctx.deps.query)
 
 
 @dataclass
@@ -170,10 +188,15 @@ def temporal_range_agent_system_prompt():
     return f"The current date is {date.today()}"
 
 
+@async_cached(agent_cache)
+async def _run_temporal_range_agent(query: str) -> TemporalRangeResult:
+    result = await temporal_range_agent.run(query)
+    return result.data
+
+
 @search_items_agent.tool
 async def set_temporal_range(ctx: RunContext[Context]) -> TemporalRangeResult:
-    result = await temporal_range_agent.run(ctx.deps.query)
-    return result.data
+    return await _run_temporal_range_agent(ctx.deps.query)
 
 
 class PropertyRef(BaseModel):
@@ -255,11 +278,18 @@ class FilterExpr(BaseModel):
 )
 
 
+@async_cached(agent_cache)
+async def _run_cql2_filter_agent(query: str) -> FilterExpr | None:
+    result = await cql2_filter_agent.run(query)
+    return result.data
+
+
 @search_items_agent.tool
 async def construct_cql2_filter(ctx: RunContext[Context]) -> FilterExpr | None:
-    return await cql2_filter_agent.run(ctx.deps.query)
+    return await _run_cql2_filter_agent(ctx.deps.query)
 
 
+@async_cached(geocoding_cache)
 async def get_polygon_from_geodini(location: str):
     geodini_api = f"{GEODINI_API}/search_complex"
     async with aiohttp.ClientSession() as session:
@@ -281,8 +311,8 @@ class ItemSearchResult:
 async def item_search(ctx: Context) -> ItemSearchResult:
     start_time = time.time()
     # formulate the query to be used for the search
-    results = await search_items_agent.run(
-        f"Find items for the query: {ctx.query}", deps=ctx
+    results = await _run_search_items_agent(
+        query=f"Find items for the query: {ctx.query}", deps=asdict(ctx)
     )
     query_formulation_time = time.time()
     logger.info(
@@ -330,8 +360,8 @@ async def item_search(ctx: Context) -> ItemSearchResult:
     params = {
         "max_items": 20,
         "collections": collections_to_search,
-        "datetime": results.data.datetime,
-        "filter": results.data.filter,
+        "datetime": results.datetime,
+        "filter": results.filter,
     }
 
     logger.info(f"Searching with params: {params}")
@@ -340,12 +370,12 @@ async def item_search(ctx: Context) -> ItemSearchResult:
         f"Params formulation time: {params_formulation_time - query_formulation_time} seconds"
     )
 
-    polygon = await get_polygon_from_geodini(results.data.location)
+    polygon = await get_polygon_from_geodini(results.location)
     if polygon:
-        logger.info(f"Found polygon for {results.data.location}")
+        logger.info(f"Found polygon for {results.location}")
         params["intersects"] = polygon
     else:
-        explanation += f"\n\n No polygon found for {results.data.location}. "
+        explanation += f"\n\n No polygon found for {results.location}. "
         return ItemSearchResult(
             items=None, search_params=params, aoi=None, explanation=explanation
         )
diff --git a/stac_search/api.py b/stac_search/api.py
@@ -2,15 +2,19 @@
 FastAPI server for STAC Natural Query
 """
 
+import logging
+from typing import Optional
+
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from typing import Optional
 import uvicorn
 
 from stac_search.agents.collections_search import collection_search
 from stac_search.agents.items_search import item_search, Context as ItemSearchContext
 
+logger = logging.getLogger(__name__)
+
 # Initialize FastAPI app
 app = FastAPI(
     title="STAC Natural Query API",
@@ -65,6 +69,7 @@ async def search_items(request: STACItemsRequest):
         results = await item_search(ctx)
         return {"results": results}
     except Exception as e:
+        logger.exception(e)
         raise HTTPException(status_code=500, detail=str(e))
 
 
diff --git a/stac_search/cache.py b/stac_search/cache.py
@@ -0,0 +1,62 @@
+"""
+Caching module for STAC Natural Query - handles various caching strategies
+"""
+
+import asyncio
+import logging
+from functools import wraps
+
+from cachetools.keys import hashkey
+from cachetools import TTLCache
+
+logger = logging.getLogger(__name__)
+
+
+geocoding_cache = TTLCache(maxsize=100, ttl=86400)  # 24 hours - locations don't change
+embedding_cache = TTLCache(maxsize=100, ttl=86400)  # 24 hours - embeddings are stable
+agent_cache = TTLCache(maxsize=100, ttl=3600)  # 1 hour - agent results cache
+
+
+def _freeze(obj):
+    if isinstance(obj, dict):
+        # sort items to make order deterministic
+        return frozenset((k, _freeze(v)) for k, v in sorted(obj.items()))
+    if isinstance(obj, (list, tuple)):
+        return tuple(_freeze(v) for v in obj)
+    if isinstance(obj, set):
+        return frozenset(_freeze(v) for v in obj)
+    return obj  # assume primitive (int, str, etc.)
+
+
+def async_cached(cache):
+    lock = asyncio.Lock()
+
+    def decorator(fn):
+        @wraps(fn)
+        async def wrapper(*args, **kwargs):
+            # freeze each arg/kwarg
+            fargs = tuple(_freeze(a) for a in args)
+            fkwargs = {k: _freeze(v) for k, v in kwargs.items()}
+            key = hashkey(f"{fn.__name__}", *fargs, **fkwargs)
+            if key in cache:
+                return cache[key]
+            async with lock:
+                if key in cache:
+                    return cache[key]
+                result = await fn(*args, **kwargs)
+                cache[key] = result
+                return result
+
+        return wrapper
+
+    return decorator
+
+
+def clear_all_caches():
+    """
+    Clear all caches
+    """
+    logger.info("Clearing all caches")
+    geocoding_cache.clear()
+    embedding_cache.clear()
+    agent_cache.clear()
diff --git a/stac_search/catalog_manager.py b/stac_search/catalog_manager.py
@@ -11,11 +11,15 @@
 from pystac_client import Client
 from sentence_transformers import SentenceTransformer
 
+from stac_search.cache import async_cached, embedding_cache
+
+
 logger = logging.getLogger(__name__)
 
 # Constants
 MODEL_NAME = "all-MiniLM-L6-v2"
 DATA_PATH = os.environ.get("DATA_PATH", "data/chromadb")
+MODEL = SentenceTransformer(MODEL_NAME)
 
 
 class CatalogManager:
@@ -24,11 +28,15 @@ class CatalogManager:
     def __init__(self, data_path: str = DATA_PATH, model_name: str = MODEL_NAME):
         self.data_path = data_path
         self.model_name = model_name
-        self.model = SentenceTransformer(model_name)
         self.client = chromadb.PersistentClient(path=data_path)
 
+    @property
+    def model(self):
+        return MODEL
+
     def _get_catalog_name(self, catalog_url: str) -> str:
         """Generate a unique catalog name from URL"""
+        logger.info(f"Generating catalog name for {catalog_url}")
         # Create a hash of the URL for consistent naming
         url_hash = hashlib.md5(catalog_url.encode()).hexdigest()[:8]
         # Clean URL for readability
@@ -81,6 +89,7 @@ def _fetch():
             logger.error(f"Error fetching collections: {e}")
             return []
 
+    @async_cached(embedding_cache)
     async def generate_embeddings(self, collections: list) -> list:
         """Generate embeddings for each collection (title + description)"""
         texts = []

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@ dependencies = [`
`19`	`19`	`"pydantic-ai",`
`20`	`20`	`"shapely",`
`21`	`21`	`"aiohttp",`
	`22`	`+ "cachetools>=5.0.0",`
`22`	`23`	`]`
`23`	`24`
`24`	`25`	`[tool.setuptools]`