Merge branch 'feature/pinecone-langsmith-llm-complete' into feature/evals-from-langfuse

strickvl · web-flow · commit 782164d48a74 · 2025-02-20T11:47:29.000+01:00
diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md
@@ -18,7 +18,7 @@ using ZenML, enabling you to build powerful, scalable, and maintainable
 LLM-powered applications.
 
 This project contains all the pipeline and step code necessary to follow along
-with the guide. You'll need a PostgreSQL database to store the embeddings; full
+with the guide. You'll need a vector store to store the embeddings; full
 instructions are provided below for how to set that up.
 
 ## 📽️ Watch the webinars
@@ -55,7 +55,40 @@ zenml secret create llm-complete --openai_api_key=<your-openai-api-key>
 export ZENML_PROJECT_SECRET_NAME=llm-complete
 ```
 
-### Setting up Supabase
+### Setting up Pinecone
+
+[Pinecone](https://www.pinecone.io/) is the default vector store used in this project. It's a cloud-native vector database that's optimized for machine learning applications. You'll need to create a Pinecone account and get an API key to use it.
+
+Once you have your Pinecone account set up, you'll need to store your API key and index name as a ZenML secret (with name `pinecone-zenml`). You can do this by running the following command:
+
+```shell
+zenml secret create pinecone-zenml --pinecone_api_key=<YOUR_PINECONE_API_KEY> --pinecone_env=<YOUR_PINECONE_ENV> --pinecone_index=<YOUR_INDEX_NAME>
+```
+
+The `pinecone_index` value you specify will be used for all your development pipeline runs. When you promote your ZenML model to production and run your ingestion pipeline again, it will automatically create a new production index called `<YOUR_INDEX_NAME>-prod`. This separation ensures that your development and production environments remain isolated.
+
+### Choosing Your Vector Store
+
+While Pinecone is the default vector store, this project supports multiple vector stores. You can choose between:
+
+1. **Pinecone** (default): A cloud-native vector database optimized for machine learning applications
+2. **PostgreSQL with pgvector**: A local or cloud PostgreSQL database with vector similarity search capabilities
+3. **Elasticsearch**: A distributed search engine with vector search support
+
+To switch between vector stores, you need to create or modify a pipeline configuration file (e.g., `configs/dev/rag.yaml`) and set the `index_type` parameter for the `index_generator` step. For example:
+
+```yaml
+steps:
+  index_generator:
+    parameters:
+        index_type: pinecone  # Options: pinecone, postgres, elasticsearch
+```
+
+This configuration will be used by both the basic RAG and RAG pipelines. Each vector store requires its own setup and credentials as described in their respective sections below.
+
+### Alternative: Setting up Supabase
+
+While Pinecone is the default vector store, you can still use Supabase's PostgreSQL database as an alternative. 
 
 [Supabase](https://supabase.com/) is a cloud provider that offers a PostgreSQL
 database. It's simple to use and has a free tier that should be sufficient for
@@ -76,7 +109,7 @@ string from the Supabase dashboard.
 
 ![](.assets/supabase-connection-string.png)
 
-In case Supabase is not an option for you, you can use a different database as the backend.
+In case neither Pinecone nor Supabase is an option for you, you can use a different database as the backend.
 
 ### Running the RAG pipeline
 
@@ -89,12 +122,12 @@ python run.py rag
 ```
 
 This will run the basic RAG pipeline, which scrapes the ZenML documentation and
-stores the embeddings in the Supabase database.
+stores the embeddings in your configured vector store (Pinecone by default).
 
 ### Querying your RAG pipeline assets
 
-Once the pipeline has run successfully, you can query the assets in the Supabase
-database using the `--query` flag as well as passing in the model you'd like to
+Once the pipeline has run successfully, you can query the assets in your vector store
+using the `--query` flag as well as passing in the model you'd like to
 use for the LLM.
 
 When you're ready to make the query, run the following command:
diff --git a/llm-complete-guide/pipelines/llm_basic_rag.py b/llm-complete-guide/pipelines/llm_basic_rag.py
@@ -25,7 +25,7 @@
 from zenml import pipeline
 
 
-@pipeline
+@pipeline(enable_cache=True)
 def llm_basic_rag() -> None:
     """Executes the pipeline to train a basic RAG model.
 
diff --git a/llm-complete-guide/pipelines/llm_eval.py b/llm-complete-guide/pipelines/llm_eval.py
@@ -28,7 +28,7 @@
 from zenml import pipeline
 
 
-@pipeline(enable_cache=False)
+@pipeline(enable_cache=True)
 def llm_eval(after: Optional[str] = None) -> None:
     """Executes the pipeline to evaluate a RAG pipeline."""
     # Retrieval evals
diff --git a/llm-complete-guide/steps/eval_retrieval.py b/llm-complete-guide/steps/eval_retrieval.py
@@ -97,7 +97,7 @@ def query_similar_docs(
         conn = get_db_conn()
     elif vector_store_name == "pinecone":
         # in pipeline runs, always use staging index
-        pinecone_index = get_pinecone_client(model_version_stage="staging")
+        pinecone_index = get_pinecone_client(model_version_name_or_id="staging")
     else:
         es_client = get_es_client()
 
diff --git a/llm-complete-guide/steps/populate_index.py b/llm-complete-guide/steps/populate_index.py
@@ -648,13 +648,15 @@ def index_generator(
     """
     # get model version
     context = get_step_context()
-    model_version_stage = context.model_version.stage
+    model_version_name_or_id = context.model_version.name
+    if context.model_version.stage == "production":
+        model_version_name_or_id = "production"
     if index_type == IndexType.ELASTICSEARCH:
         _index_generator_elastic(documents)
     elif index_type == IndexType.POSTGRES:
         _index_generator_postgres(documents)
     elif index_type == IndexType.PINECONE:
-        _index_generator_pinecone(documents, model_version_stage)
+        _index_generator_pinecone(documents, model_version_name_or_id)
     else:
         raise ValueError(f"Unknown index type: {index_type}")
 
@@ -829,16 +831,14 @@ def _index_generator_postgres(documents: str) -> None:
             conn.close()
 
 
-def _index_generator_pinecone(
-    documents: str, model_version_stage: str
-) -> None:
+def _index_generator_pinecone(documents: str, model_version_name_or_id: str) -> None:
     """Generates a Pinecone index for the given documents.
 
     Args:
         documents (str): JSON string containing the documents to index.
         model_version (str): Name of the model version.
     """
-    index = get_pinecone_client(model_version_stage=model_version_stage)
+    index = get_pinecone_client(model_version_name_or_id=model_version_name_or_id)
 
     # Load documents
     docs = json.loads(documents)
diff --git a/llm-complete-guide/steps/url_scraper.py b/llm-complete-guide/steps/url_scraper.py
@@ -53,9 +53,10 @@ def url_scraper(
     else:
         docs_urls = get_all_pages(docs_url)
 
-    # website_urls = get_all_pages(website_url)
+    website_urls = get_all_pages(website_url)
     # all_urls = docs_urls + website_urls + examples_readme_urls
-    all_urls = docs_urls
+    # all_urls = website_urls
+    all_urls = ["https://zenml.io"]
     log_metadata(
         metadata={
             "count": len(all_urls),
diff --git a/llm-complete-guide/utils/llm_utils.py b/llm-complete-guide/utils/llm_utils.py
@@ -285,9 +285,7 @@ def get_db_conn() -> connection:
         raise
 
 
-def get_pinecone_client(
-    model_version_stage: str = "staging",
-) -> pinecone.Index:
+def get_pinecone_client(model_version_name_or_id: str = "dev") -> pinecone.Index:
     """Get a Pinecone index client.
 
     Returns:
@@ -305,24 +303,35 @@ def get_pinecone_client(
     # raise error if there is no index name attached to the metadata
     model_version = client.get_model_version(
         model_name_or_id=ZENML_CHATBOT_MODEL_NAME,
-        model_version_name_or_number_or_id=model_version_stage,
+        model_version_name_or_number_or_id=model_version_name_or_id,
     )
 
-    if model_version_stage == "staging":
+    index_name_from_secret = client.get_secret(SECRET_NAME_PINECONE).secret_values.get("pinecone_index", "zenml-docs")
+
+    if model_version_name_or_id == "production":
+        index_name = f"{index_name_from_secret}-prod"
+
+        model_version.run_metadata["vector_store"]["index_name"] = index_name
+
+        # delete index if it exists
+        if index_name in pc.list_indexes().names():
+            pc.delete_index(index_name)
+
+        # create index
+        pc.create_index(
+            name=index_name,
+            dimension=EMBEDDING_DIMENSIONALITY,
+            metric="cosine",
+            spec=ServerlessSpec(cloud="aws", region="us-east-1")
+        )
+    else:
         try:
             index_name = model_version.run_metadata["vector_store"][
                 "index_name"
             ]
         except KeyError:
-            index_name = client.get_secret(
-                SECRET_NAME_PINECONE
-            ).secret_values.get("pinecone_index", "zenml-docs-dev")
-            # if index by that name exists already, create a new one with a random suffix
-            if index_name in pc.list_indexes().names():
-                index_name = f"{index_name}-{uuid.uuid4()}"
-            model_version.run_metadata["vector_store"]["index_name"] = (
-                index_name
-            )
+            index_name = index_name_from_secret
+            model_version.run_metadata["vector_store"]["index_name"] = index_name
 
         # Create index if it doesn't exist
         if index_name not in pc.list_indexes().names():
@@ -332,23 +341,7 @@ def get_pinecone_client(
                 metric="cosine",
                 spec=ServerlessSpec(cloud="aws", region="us-east-1"),
             )
-
-    if model_version_stage == "production":
-        try:
-            index_name = model_version.run_metadata["vector_store"][
-                "index_name"
-            ]
-        except KeyError:
-            raise ValueError(
-                "The production model version should have an index name attached to it. None found."
-            )
-
-        # if index doesn't exist, raise error
-        if index_name not in pc.list_indexes().names():
-            raise ValueError(
-                f"The index {index_name} attached to the production model version does not exist. Please create it first."
-            )
-
+        
     return pc.Index(index_name)
 
 
@@ -679,9 +672,7 @@ def process_input_with_retrieval(
             include_metadata=True,
         )
     elif vector_store == "pinecone":
-        pinecone_index = get_pinecone_client(
-            model_version_stage=model_version_stage
-        )
+        pinecone_index = get_pinecone_client(model_version_name_or_id=model_version_stage)
         similar_docs = get_topn_similar_docs(
             query_embedding=query_embedding,
             pinecone_index=pinecone_index,