22Catalog Manager for STAC Natural Query - handles dynamic catalog loading and management
33"""
44
5+ import asyncio
56import hashlib
67import logging
78import os
@@ -52,38 +53,46 @@ def catalog_exists(self, catalog_url: str) -> bool:
5253 logger .error (f"Error checking catalog existence: { e } " )
5354 return False
5455
55- def validate_catalog_url (self , catalog_url : str ) -> bool :
56+ async def validate_catalog_url (self , catalog_url : str ) -> bool :
5657 """Validate that the catalog URL is accessible and is a valid STAC catalog"""
5758 try :
58- stac_client = Client .open (catalog_url )
59- # Try to get at least one collection to verify it's a valid catalog
60- collections = list (stac_client .collection_search ().collections ())
61- return len (collections ) > 0
59+
60+ def _validate ():
61+ stac_client = Client .open (catalog_url )
62+ # Try to get at least one collection to verify it's a valid catalog
63+ collections = list (stac_client .collection_search ().collections ())
64+ return len (collections ) > 0
65+
66+ return await asyncio .to_thread (_validate )
6267 except Exception as e :
6368 logger .error (f"Invalid catalog URL { catalog_url } : { e } " )
6469 return False
6570
66- def fetch_collections (self , stac_client : Client ) -> list :
71+ async def fetch_collections (self , stac_client : Client ) -> list :
6772 """Fetch STAC collections using pystac-client"""
6873 try :
69- collections = stac_client .collection_search ().collections ()
70- return list (collections )
74+
75+ def _fetch ():
76+ collections = stac_client .collection_search ().collections ()
77+ return list (collections )
78+
79+ return await asyncio .to_thread (_fetch )
7180 except Exception as e :
7281 logger .error (f"Error fetching collections: { e } " )
7382 return []
7483
75- def generate_embeddings (self , collections : list ) -> list :
84+ async def generate_embeddings (self , collections : list ) -> list :
7685 """Generate embeddings for each collection (title + description)"""
7786 texts = []
7887 for collection in collections :
7988 title = getattr (collection , "title" , "" ) or ""
8089 description = getattr (collection , "description" , "" ) or ""
8190 texts .append (f"{ title } { description } " )
8291
83- embeddings = self .model .encode ( texts )
92+ embeddings = await asyncio . to_thread ( self .model .encode , texts )
8493 return embeddings
8594
86- def store_in_vector_db (self , collections : list , chroma_collection ) -> None :
95+ async def store_in_vector_db (self , collections : list , chroma_collection ) -> None :
8796 """Store embeddings in ChromaDB"""
8897 if not collections :
8998 logger .warning ("No collections to store" )
@@ -98,9 +107,10 @@ def store_in_vector_db(self, collections: list, chroma_collection) -> None:
98107 }
99108 metadatas .append (metadata )
100109
101- embeddings = self .generate_embeddings (collections )
110+ embeddings = await self .generate_embeddings (collections )
102111
103- chroma_collection .add (
112+ await asyncio .to_thread (
113+ chroma_collection .add ,
104114 ids = [str (i ) for i in range (len (collections ))],
105115 embeddings = embeddings ,
106116 metadatas = metadatas ,
@@ -110,7 +120,7 @@ async def load_catalog(self, catalog_url: str) -> Dict[str, Any]:
110120 """Load and index a catalog if it doesn't exist"""
111121 try :
112122 # Validate catalog URL first
113- if not self .validate_catalog_url (catalog_url ):
123+ if not await self .validate_catalog_url (catalog_url ):
114124 return {
115125 "success" : False ,
116126 "error" : f"Invalid or inaccessible catalog URL: { catalog_url } " ,
@@ -127,8 +137,8 @@ async def load_catalog(self, catalog_url: str) -> Dict[str, Any]:
127137
128138 # Load the catalog
129139 logger .info (f"Loading catalog from { catalog_url } " )
130- stac_client = Client .open ( catalog_url )
131- collections = self .fetch_collections (stac_client )
140+ stac_client = await asyncio . to_thread ( Client .open , catalog_url )
141+ collections = await self .fetch_collections (stac_client )
132142
133143 if not collections :
134144 return {
@@ -143,7 +153,7 @@ async def load_catalog(self, catalog_url: str) -> Dict[str, Any]:
143153 )
144154
145155 # Store in vector database
146- self .store_in_vector_db (collections , chroma_collection )
156+ await self .store_in_vector_db (collections , chroma_collection )
147157
148158 logger .info (
149159 f"Successfully indexed { len (collections )} collections from { catalog_url } "
0 commit comments