55from sentence_transformers import SentenceTransformer
66import chromadb
77from pystac_client import Client
8+ import os
9+ import logging
10+
11+ # Configure logging
12+ logging .basicConfig (level = logging .INFO )
13+ logger = logging .getLogger (__name__ )
814
915# Constants
1016MODEL_NAME = "all-MiniLM-L6-v2"
11- DATA_PATH = " data/chromadb"
17+ DATA_PATH = os . environ . get ( "DATA_PATH" , " data/chromadb")
1218
1319
1420def load_data (catalog_url , catalog_name ):
1521 """Load STAC collections into the vector database"""
16- print ("Initializing vector database..." )
22+ logger . info ("Initializing vector database..." )
1723
1824 # Initialize the model
1925 model = SentenceTransformer (MODEL_NAME )
@@ -27,14 +33,14 @@ def load_data(catalog_url, catalog_name):
2733 # Initialize STAC client
2834 stac_client = Client .open (catalog_url )
2935
30- print ("Fetching STAC collections..." )
36+ logger . info ("Fetching STAC collections..." )
3137 collections = fetch_collections (stac_client )
32- print (f"Found { len (collections )} collections" )
38+ logger . info (f"Found { len (collections )} collections" )
3339
34- print ("Generating embeddings and storing in vector database..." )
40+ logger . info ("Generating embeddings and storing in vector database..." )
3541 store_in_vector_db (collections , model , chroma_collection )
3642
37- print ("Data loading complete!" )
43+ logger . info ("Data loading complete!" )
3844
3945
4046def fetch_collections (stac_client ):
@@ -73,8 +79,15 @@ def store_in_vector_db(collections, model, chroma_collection):
7379
7480
7581if __name__ == "__main__" :
76- load_data (catalog_url = "https://stac.eoapi.dev/" , catalog_name = "eoapi.dev" )
77- load_data (
78- catalog_url = "https://planetarycomputer.microsoft.com/api/stac/v1" ,
79- catalog_name = "planetarycomputer" ,
82+ # load_data(catalog_url="https://stac.eoapi.dev/", catalog_name="eoapi.dev")
83+ # load_data(
84+ # catalog_url="https://planetarycomputer.microsoft.com/api/stac/v1",
85+ # catalog_name="planetarycomputer",
86+ # )
87+ import os
88+
89+ STAC_CATALOG_URL = os .environ .get (
90+ "STAC_CATALOG_URL" , "https://planetarycomputer.microsoft.com/api/stac/v1"
8091 )
92+ STAC_CATALOG_NAME = os .environ .get ("STAC_CATALOG_NAME" , "planetarycomputer" )
93+ load_data (catalog_url = STAC_CATALOG_URL , catalog_name = STAC_CATALOG_NAME )
0 commit comments