use the right index for the model stage

wjayesh · wjayesh · commit c648381a88ae · 2025-02-19T12:52:07.000+05:30
diff --git a/llm-complete-guide/deployment_hf.py b/llm-complete-guide/deployment_hf.py
@@ -23,10 +23,12 @@
 
 def predict(message, history):
     try:
+        # add the prod flag here
         return process_input_with_retrieval(
             input=message,
             n_items_retrieved=20,
             use_reranking=True,
+            model_version_stage="production",
         )
     except Exception as e:
         logger.error(f"Error processing message: {e}")
diff --git a/llm-complete-guide/run.py b/llm-complete-guide/run.py
@@ -232,8 +232,9 @@ def main(
             raise click.UsageError(
                 "--query-text is required when using 'query' command"
             )
+        # add the prod flag here
         response = process_input_with_retrieval(
-            query_text, model=model, use_reranking=use_reranker
+            query_text, model=model, use_reranking=use_reranker, model_version_stage="production"
         )
         console = Console()
         md = Markdown(response)
diff --git a/llm-complete-guide/steps/eval_retrieval.py b/llm-complete-guide/steps/eval_retrieval.py
@@ -96,7 +96,8 @@ def query_similar_docs(
     if vector_store_name == "pgvector":
         conn = get_db_conn()
     elif vector_store_name == "pinecone":
-        pinecone_index = get_pinecone_client()
+        # in pipeline runs, always use staging index
+        pinecone_index = get_pinecone_client(model_version_stage="staging")
     else:
         es_client = get_es_client()
 
diff --git a/llm-complete-guide/steps/populate_index.py b/llm-complete-guide/steps/populate_index.py
@@ -48,8 +48,8 @@
 from PIL import Image, ImageDraw, ImageFont
 from sentence_transformers import SentenceTransformer
 from structures import Document
-from utils.llm_utils import get_db_conn, get_es_client, split_documents
-from zenml import ArtifactConfig, log_metadata, step
+from utils.llm_utils import get_db_conn, get_es_client, get_pinecone_client, split_documents
+from zenml import ArtifactConfig, get_step_context, log_metadata, step
 from zenml.client import Client
 from zenml.metadata.metadata_types import Uri
 import pinecone
@@ -642,12 +642,15 @@ def index_generator(
         documents (str): JSON string containing the documents to index.
         index_type (IndexType, optional): Type of index to generate. Defaults to IndexType.POSTGRES.
     """
+    # get model version 
+    context = get_step_context()
+    model_version_stage = context.model_version.stage
     if index_type == IndexType.ELASTICSEARCH:
         _index_generator_elastic(documents)
     elif index_type == IndexType.POSTGRES:
         _index_generator_postgres(documents)
     elif index_type == IndexType.PINECONE:
-        _index_generator_pinecone(documents)
+        _index_generator_pinecone(documents, model_version_stage)
     else:
         raise ValueError(f"Unknown index type: {index_type}")
 
@@ -822,33 +825,14 @@ def _index_generator_postgres(documents: str) -> None:
             conn.close()
 
 
-def _index_generator_pinecone(documents: str) -> None:
+def _index_generator_pinecone(documents: str, model_version_stage: str) -> None:
     """Generates a Pinecone index for the given documents.
 
     Args:
         documents (str): JSON string containing the documents to index.
+        model_version (str): Name of the model version.
     """
-    client = Client()
-    pinecone_api_key = client.get_secret(SECRET_NAME_PINECONE).secret_values["pinecone_api_key"]
-    index_name = client.get_secret(SECRET_NAME_PINECONE).secret_values.get("pinecone_index", "zenml-docs")
-
-    # Initialize Pinecone
-    pc = Pinecone(api_key=pinecone_api_key)
-
-    # Create index if it doesn't exist
-    if index_name not in pc.list_indexes().names():
-        pc.create_index(
-            name=index_name,
-            dimension=EMBEDDING_DIMENSIONALITY,
-            metric="cosine",
-            spec=ServerlessSpec(
-                cloud="aws",
-                region="us-east-1"
-            )
-        )
-
-    # Get the index
-    index = pc.Index(index_name)
+    index = get_pinecone_client(model_version_stage=model_version_stage)
 
     # Load documents
     docs = json.loads(documents)
@@ -886,7 +870,7 @@ def _index_generator_pinecone(documents: str) -> None:
     if batch:
         index.upsert(vectors=batch)
 
-    logger.info(f"Successfully indexed {len(docs)} documents to Pinecone index '{index_name}'")
+    logger.info(f"Successfully indexed {len(docs)} documents to Pinecone index")
 
 
 def _log_metadata(index_type: IndexType) -> None:
diff --git a/llm-complete-guide/steps/rag_deployment.py b/llm-complete-guide/steps/rag_deployment.py
@@ -56,10 +56,12 @@
 
 
 def predict(message, history):
+    # add the prod flag here
     return process_input_with_retrieval(
         input=message,
         n_items_retrieved=20,
         use_reranking=True,
+        model_version_stage="production",
     )
 
 
diff --git a/llm-complete-guide/utils/llm_utils.py b/llm-complete-guide/utils/llm_utils.py
@@ -27,7 +27,7 @@
 
 from utils.openai_utils import get_openai_api_key
 import pinecone
-from pinecone import Pinecone
+from pinecone import Pinecone, ServerlessSpec
 # Configure logging levels for specific modules
 logging.getLogger("pytorch").setLevel(logging.CRITICAL)
 logging.getLogger("sentence-transformers").setLevel(logging.CRITICAL)
@@ -45,6 +45,7 @@
 import psycopg2
 import tiktoken
 from constants import (
+    EMBEDDING_DIMENSIONALITY,
     EMBEDDINGS_MODEL,
     MODEL_NAME_MAP,
     OPENAI_MODEL,
@@ -279,17 +280,54 @@ def get_db_conn() -> connection:
         raise
 
 
-def get_pinecone_client() -> pinecone.Index:
+def get_pinecone_client(model_version_stage: str = "staging") -> pinecone.Index:
     """Get a Pinecone index client.
 
     Returns:
         pinecone.Index: A Pinecone index client.
     """
     client = Client()
     pinecone_api_key = client.get_secret(SECRET_NAME_PINECONE).secret_values["pinecone_api_key"]
-    index_name = client.get_secret(SECRET_NAME_PINECONE).secret_values.get("pinecone_index", "zenml-docs")
-
     pc = Pinecone(api_key=pinecone_api_key)
+
+    # if the model versio is staging, we check if any index name is associated as metadata
+    # if not, create a new one with the name from the secret and attach it to the metadata
+    # if the model version is production, we just use the index name from the metadata attached to it
+    # raise error if there is no index name attached to the metadata
+    model_version = client.get_model_version(
+        model_name_or_id=ZENML_CHATBOT_MODEL_NAME,
+        model_version_name_or_number_or_id=model_version_stage,
+    )
+
+    if model_version_stage == "staging":
+        try:
+            index_name = model_version.run_metadata["vector_store"]["index_name"]
+        except KeyError:
+            index_name = client.get_secret(SECRET_NAME_PINECONE).secret_values.get("pinecone_index", "zenml-docs-dev")
+            model_version.run_metadata["vector_store"]["index_name"] = index_name
+
+        # Create index if it doesn't exist
+        if index_name not in pc.list_indexes().names():
+            pc.create_index(
+                name=index_name,
+                dimension=EMBEDDING_DIMENSIONALITY,
+                metric="cosine",
+                spec=ServerlessSpec(
+                    cloud="aws",
+                    region="us-east-1"
+                )
+            )
+
+    if model_version_stage == "production":
+        try:
+            index_name = model_version.run_metadata["vector_store"]["index_name"]
+        except KeyError:
+            raise ValueError("The production model version should have an index name attached to it. None found.")
+        
+        # if index doesn't exist, raise error
+        if index_name not in pc.list_indexes().names():
+            raise ValueError(f"The index {index_name} attached to the production model version does not exist. Please create it first.")
+
     return pc.Index(index_name)
 
 
@@ -579,6 +617,7 @@ def process_input_with_retrieval(
     model: str = OPENAI_MODEL,
     n_items_retrieved: int = 20,
     use_reranking: bool = False,
+    model_version_stage: str = "staging",
 ) -> str:
     """Process the input with retrieval.
 
@@ -590,7 +629,7 @@ def process_input_with_retrieval(
             the database. Defaults to 5.
         use_reranking (bool, optional): Whether to use reranking. Defaults to
             False.
-
+        model_version_stage (str, optional): The stage of the model version. Defaults to "staging".
     Returns:
         str: The processed output.
     """
@@ -609,7 +648,7 @@ def process_input_with_retrieval(
             include_metadata=True,
         )
     elif vector_store == "pinecone":
-        pinecone_index = get_pinecone_client()
+        pinecone_index = get_pinecone_client(model_version_stage=model_version_stage)
         similar_docs = get_topn_similar_docs(
             query_embedding=query_embedding,
             pinecone_index=pinecone_index,

Original file line number	Diff line number	Diff line change
`@@ -232,8 +232,9 @@ def main(`
`232`	`232`	`raise click.UsageError(`
`233`	`233`	`"--query-text is required when using 'query' command"`
`234`	`234`	`)`
	`235`	`+ # add the prod flag here`
`235`	`236`	`response = process_input_with_retrieval(`
`236`		`- query_text, model=model, use_reranking=use_reranker`
	`237`	`+ query_text, model=model, use_reranking=use_reranker, model_version_stage="production"`
`237`	`238`	`)`
`238`	`239`	`console = Console()`
`239`	`240`	`md = Markdown(response)`
Original file line number	Diff line number	Diff line change
`@@ -56,10 +56,12 @@`
`56`	`56`
`57`	`57`
`58`	`58`	`def predict(message, history):`
	`59`	`+ # add the prod flag here`
`59`	`60`	`return process_input_with_retrieval(`
`60`	`61`	`input=message,`
`61`	`62`	`n_items_retrieved=20,`
`62`	`63`	`use_reranking=True,`
	`64`	`+ model_version_stage="production",`
`63`	`65`	`)`
`64`	`66`
`65`	`67`