Iterate on pinecone changes

AlexejPenner · AlexejPenner · commit 896c89d546ff · 2025-02-20T23:06:40.000+01:00
diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md
@@ -59,13 +59,14 @@ export ZENML_PROJECT_SECRET_NAME=llm-complete
 
 [Pinecone](https://www.pinecone.io/) is the default vector store used in this project. It's a cloud-native vector database that's optimized for machine learning applications. You'll need to create a Pinecone account and get an API key to use it.
 
-Once you have your Pinecone account set up, you'll need to store your API key and index name as a ZenML secret (with name `pinecone-zenml`). You can do this by running the following command:
+Once you have your Pinecone account set up, you'll need to store your API key and index name as a ZenML secret. You can do this by running the following command:
 
 ```shell
-zenml secret create pinecone-zenml --pinecone_api_key=<YOUR_PINECONE_API_KEY> --pinecone_env=<YOUR_PINECONE_ENV> --pinecone_index=<YOUR_INDEX_NAME>
+zenml secret update llm-complete -v '{"pinecone_api_key": "YOUR_PINECONE_API_KEY", "pinecone_env": "YOUR_PINECONE_ENV", "pinecone_index": "YOUR_INDEX_NAME"}'
+
 ```
 
-The `pinecone_index` value you specify will be used for all your development pipeline runs. When you promote your ZenML model to production and run your ingestion pipeline again, it will automatically create a new production index called `<YOUR_INDEX_NAME>-prod`. This separation ensures that your development and production environments remain isolated.
+The `pinecone_index` value you specify will be used for all your development pipeline runs. Make sure the value consists only of alphanumeric characters and dashes. When you promote your ZenML model to production and run your ingestion pipeline again, it will automatically create a new production index called `<YOUR_INDEX_NAME>-prod`. This separation ensures that your development and production environments remain isolated.
 
 ### Choosing Your Vector Store
 
diff --git a/llm-complete-guide/configs/dev/rag.yaml b/llm-complete-guide/configs/dev/rag.yaml
@@ -30,4 +30,4 @@ steps:
       use_dev_set: true
   index_generator:
     parameters:
-      index_type: postgres
+      index_type: postgres  # Options: pinecone, postgres, elasticsearch
diff --git a/llm-complete-guide/configs/dev/rag_eval.yaml b/llm-complete-guide/configs/dev/rag_eval.yaml
@@ -15,3 +15,12 @@ settings:
       - pygithub
       - elasticsearch
     python_package_installer: "uv"
+    
+steps:
+  url_scraper:
+    parameters:
+      docs_url: https://docs.zenml.io/
+      use_dev_set: true
+  index_generator:
+    parameters:
+        index_type: postgres 
diff --git a/llm-complete-guide/deployment_hf.py b/llm-complete-guide/deployment_hf.py
@@ -12,6 +12,7 @@
 logger = logging.getLogger(__name__)
 
 APP_ENVIRONMENT = os.getenv("GRADIO_ZENML_APP_ENVIRONMENT", "dev")
+print(os.getenv("ZENML_STORE_API_KEY"), os.getenv("ZENML_STORE_URL"))
 
 # Initialize ZenML client and verify secret access
 try:
diff --git a/llm-complete-guide/run.py b/llm-complete-guide/run.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 import warnings
 from pathlib import Path
+import os
 
 # Suppress the specific FutureWarning from huggingface_hub
 warnings.filterwarnings(
@@ -39,7 +40,7 @@
 from typing import Optional
 
 import click
-from constants import OPENAI_MODEL
+from constants import OPENAI_MODEL, SECRET_NAME
 from materializers.document_materializer import DocumentMaterializer
 from pipelines import (
     finetune_embeddings,
@@ -54,9 +55,34 @@
 from structures import Document
 from zenml import Model
 from zenml.materializers.materializer_registry import materializer_registry
+from zenml.client import Client
 
 logger = get_logger(__name__)
 
+# First try to get from environment variables
+LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY")
+LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY")
+LANGFUSE_HOST = os.getenv("LANGFUSE_HOST")
+
+# If any are not set, get from ZenML secrets and set the env vars
+if not all([LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, LANGFUSE_HOST]):
+    secret = Client().get_secret(SECRET_NAME)
+    
+    if not LANGFUSE_PUBLIC_KEY:
+        LANGFUSE_PUBLIC_KEY = secret.secret_values.get("langfuse_public_key")
+        if LANGFUSE_PUBLIC_KEY:
+            os.environ["LANGFUSE_PUBLIC_KEY"] = LANGFUSE_PUBLIC_KEY
+        
+    if not LANGFUSE_SECRET_KEY:
+        LANGFUSE_SECRET_KEY = secret.secret_values.get("langfuse_secret_key")
+        if LANGFUSE_SECRET_KEY:
+            os.environ["LANGFUSE_SECRET_KEY"] = LANGFUSE_SECRET_KEY
+        
+    if not LANGFUSE_HOST:
+        LANGFUSE_HOST = secret.secret_values.get("langfuse_host")
+        if LANGFUSE_HOST:
+            os.environ["LANGFUSE_HOST"] = LANGFUSE_HOST
+
 
 @click.command(
     help="""
diff --git a/llm-complete-guide/steps/eval_retrieval.py b/llm-complete-guide/steps/eval_retrieval.py
@@ -140,6 +140,7 @@ def process_single_pair(
     question, url_ending, urls = query_similar_docs(
         pair["question"], pair["url_ending"], use_reranking
     )
+    print(f"question: {question}, url_ending: {url_ending}, urls: {urls}")
     is_failure = all(url_ending not in url for url in urls)
     return is_failure, question, url_ending, urls
 
@@ -189,7 +190,7 @@ def process_with_progress(
     )
 
     results = []
-    with Pool(processes=n_processes) as pool:
+    with Pool(processes=n_processes) as pool: # Sleep for 3 seconds before starting processing
         for i, result in enumerate(pool.imap(worker_fn, items), 1):
             results.append(result)
             logger.info(f"Completed {i}/{len(items)} tests")
diff --git a/llm-complete-guide/steps/populate_index.py b/llm-complete-guide/steps/populate_index.py
@@ -927,7 +927,7 @@ def _log_metadata(index_type: IndexType) -> None:
         connection_details = {
             "api_key": "**********",
             "environment": client.get_secret(
-                SECRET_NAME_PINECONE
+                SECRET_NAME
             ).secret_values["pinecone_env"],
         }
 
diff --git a/llm-complete-guide/steps/rag_deployment.py b/llm-complete-guide/steps/rag_deployment.py
@@ -10,20 +10,17 @@
 from zenml.integrations.registry import integration_registry
 
 # Try to get from environment first, otherwise fall back to secret store
-ZENML_API_TOKEN = os.environ.get("ZENML_API_TOKEN")
+ZENML_STORE_API_KEY = os.environ.get("ZENML_STORE_API_KEY")
 ZENML_STORE_URL = os.environ.get("ZENML_STORE_URL")
 
 secret = Client().get_secret(SECRET_NAME)
 
-if not ZENML_API_TOKEN or not ZENML_STORE_URL:
+if not ZENML_STORE_API_KEY or not ZENML_STORE_URL:
     # Get ZenML server URL and API token from the secret store
-    ZENML_API_TOKEN = ZENML_API_TOKEN or secret.secret_values.get(
-        "zenml_api_token"
+    ZENML_STORE_API_KEY = ZENML_STORE_API_KEY or secret.secret_values.get(
+        "zenml_store_api_token"
     )
-    ZENML_STORE_URL = ZENML_STORE_URL or secret.secret_values.get(
-        "zenml_store_url"
-    )
-
+    ZENML_STORE_URL = ZENML_STORE_URL or secret.secret_values.get("zenml_store_url")
 
 LANGFUSE_PUBLIC_KEY = os.environ.get(
     "LANGFUSE_PUBLIC_KEY", secret.secret_values.get("LANGFUSE_PUBLIC_KEY")
@@ -110,7 +107,7 @@ def gradio_rag_deployment() -> None:
     Starts a web server with a chat interface that echoes back user messages.
     The server runs indefinitely until manually stopped.
     """
-    api = HfApi()
+    api = HfApi(token=get_hf_token())
     api.create_repo(
         repo_id=hf_repo_id,
         repo_type="space",
@@ -119,13 +116,12 @@ def gradio_rag_deployment() -> None:
         exist_ok=True,
         token=get_hf_token(),
     )
-
     # Ensure values are strings
-    if ZENML_API_TOKEN is not None:
+    if ZENML_STORE_API_KEY is not None:
         api.add_space_secret(
             repo_id=hf_repo_id,
             key="ZENML_STORE_API_KEY",
-            value=str(ZENML_API_TOKEN),
+            value=str(ZENML_STORE_API_KEY),
         )
 
     if ZENML_STORE_URL is not None:
diff --git a/llm-complete-guide/steps/url_scraper.py b/llm-complete-guide/steps/url_scraper.py
@@ -54,7 +54,7 @@ def url_scraper(
         docs_urls = get_all_pages(docs_url)
 
     website_urls = get_all_pages(website_url)
-    # all_urls = docs_urls + website_urls + examples_readme_urls
+    # all_urls = docs_urls + website_urls
     # all_urls = website_urls
     all_urls = ["https://zenml.io"]
     log_metadata(
diff --git a/llm-complete-guide/utils/llm_utils.py b/llm-complete-guide/utils/llm_utils.py
@@ -64,6 +64,30 @@
 
 logger = logging.getLogger(__name__)
 
+# First try to get from environment variables
+LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY")
+LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY")
+LANGFUSE_HOST = os.getenv("LANGFUSE_HOST")
+
+# If any are not set, get from ZenML secrets and set the env vars
+if not all([LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, LANGFUSE_HOST]):
+    secret = Client().get_secret(SECRET_NAME)
+    
+    if not LANGFUSE_PUBLIC_KEY:
+        LANGFUSE_PUBLIC_KEY = secret.secret_values.get("langfuse_public_key")
+        if LANGFUSE_PUBLIC_KEY:
+            os.environ["LANGFUSE_PUBLIC_KEY"] = LANGFUSE_PUBLIC_KEY
+        
+    if not LANGFUSE_SECRET_KEY:
+        LANGFUSE_SECRET_KEY = secret.secret_values.get("langfuse_secret_key")
+        if LANGFUSE_SECRET_KEY:
+            os.environ["LANGFUSE_SECRET_KEY"] = LANGFUSE_SECRET_KEY
+        
+    if not LANGFUSE_HOST:
+        LANGFUSE_HOST = secret.secret_values.get("langfuse_host")
+        if LANGFUSE_HOST:
+            os.environ["LANGFUSE_HOST"] = LANGFUSE_HOST
+
 # logs all litellm requests to langfuse
 litellm.callbacks = ["langfuse"]
 
@@ -293,7 +317,7 @@ def get_pinecone_client(
         pinecone.Index: A Pinecone index client.
     """
     client = Client()
-    pinecone_api_key = client.get_secret(SECRET_NAME_PINECONE).secret_values[
+    pinecone_api_key = client.get_secret(SECRET_NAME).secret_values[
         "pinecone_api_key"
     ]
     pc = Pinecone(api_key=pinecone_api_key)
@@ -308,12 +332,15 @@ def get_pinecone_client(
     )
 
     index_name_from_secret = client.get_secret(
-        SECRET_NAME_PINECONE
-    ).secret_values.get("pinecone_index", "zenml-docs")
+        SECRET_NAME).secret_values.get("pinecone_index", "zenml-docs")
 
     if model_version_name_or_id == "production":
         index_name = f"{index_name_from_secret}-prod"
 
+        # Initialize vector_store metadata if it doesn't exist
+        if "vector_store" not in model_version.run_metadata:
+            model_version.run_metadata["vector_store"] = {}
+
         model_version.run_metadata["vector_store"]["index_name"] = index_name
 
         # delete index if it exists
@@ -329,14 +356,13 @@ def get_pinecone_client(
         )
     else:
         try:
-            index_name = model_version.run_metadata["vector_store"][
-                "index_name"
-            ]
+            index_name = model_version.run_metadata["vector_store"]["index_name"]
         except KeyError:
             index_name = index_name_from_secret
-            model_version.run_metadata["vector_store"]["index_name"] = (
-                index_name
-            )
+            # Initialize vector_store metadata if it doesn't exist
+            if "vector_store" not in model_version.run_metadata:
+                model_version.run_metadata["vector_store"] = {}
+            model_version.run_metadata["vector_store"]["index_name"] = index_name
 
         # Create index if it doesn't exist
         if index_name not in pc.list_indexes().names():

Original file line number	Diff line number	Diff line change
`@@ -140,6 +140,7 @@ def process_single_pair(`
`140`	`140`	`question, url_ending, urls = query_similar_docs(`
`141`	`141`	`pair["question"], pair["url_ending"], use_reranking`
`142`	`142`	`)`
	`143`	`+ print(f"question: {question}, url_ending: {url_ending}, urls: {urls}")`
`143`	`144`	`is_failure = all(url_ending not in url for url in urls)`
`144`	`145`	`return is_failure, question, url_ending, urls`
`145`	`146`
`@@ -189,7 +190,7 @@ def process_with_progress(`
`189`	`190`	`)`
`190`	`191`
`191`	`192`	`results = []`
`192`		`- with Pool(processes=n_processes) as pool:`
	`193`	`+ with Pool(processes=n_processes) as pool: # Sleep for 3 seconds before starting processing`
`193`	`194`	`for i, result in enumerate(pool.imap(worker_fn, items), 1):`
`194`	`195`	`results.append(result)`
`195`	`196`	`logger.info(f"Completed {i}/{len(items)} tests")`
Original file line number	Diff line number	Diff line change
`@@ -927,7 +927,7 @@ def _log_metadata(index_type: IndexType) -> None:`
`927`	`927`	`connection_details = {`
`928`	`928`	`"api_key": "**********",`
`929`	`929`	`"environment": client.get_secret(`
`930`		`- SECRET_NAME_PINECONE`
	`930`	`+ SECRET_NAME`
`931`	`931`	`).secret_values["pinecone_env"],`
`932`	`932`	`}`
`933`	`933`