zenml-io · strickvl · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025 · Jan 31, 2025
diff --git a/llm-complete-guide/README.md b/llm-complete-guide/README.md
@@ -100,7 +100,7 @@ use for the LLM.
 When you're ready to make the query, run the following command:
 
 ```shell
-python run.py query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
+python run.py query --query-text "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
 ```
 
 Alternative options for LLMs to use include:
@@ -147,13 +147,7 @@ export ZENML_HF_SPACE_NAME=<YOUR_HF_SPACE_NAME> # optional, defaults to "llm-com
 To deploy the RAG pipeline, you can use the following command:
 
 ```shell
-python run.py --deploy
-```
-
-Alternatively, you can run the basic RAG pipeline *and* deploy it in one go:
-
-```shell
-python run.py --rag --deploy
+python run.py deploy
 ```
 
 This will open a Hugging Face space in your browser where you can interact with

diff --git a/llm-complete-guide/configs/dev/rag.yaml b/llm-complete-guide/configs/dev/rag.yaml
@@ -1,6 +1,5 @@
 enable_cache: False
 
-# environment configuration
 settings:
   docker:
     requirements:

diff --git a/llm-complete-guide/constants.py b/llm-complete-guide/constants.py
@@ -17,7 +17,7 @@
 import os
 
 # Vector Store constants
-CHUNK_SIZE = 2000
+CHUNK_SIZE = 1000
 CHUNK_OVERLAP = 50
 EMBEDDING_DIMENSIONALITY = (
     384  # Update this to match the dimensionality of the new model
@@ -35,8 +35,8 @@
 MODEL_NAME_MAP = {
     "gpt4": "gpt-4",
     "gpt35": "gpt-3.5-turbo",
-    "claude3": "claude-3-opus-20240229",
-    "claudehaiku": "claude-3-haiku-20240307",
+    "claude3": "claude-3-5-sonnet-latest",
+    "claudehaiku": "claude-3-5-haiku-latest",
 }
 
 # CHUNKING_METHOD = "split-by-document"

diff --git a/llm-complete-guide/gh_action_rag.py b/llm-complete-guide/gh_action_rag.py
@@ -21,12 +21,10 @@
 
 import click
 import yaml
-from zenml.enums import PluginSubType
-
 from pipelines.llm_index_and_evaluate import llm_index_and_evaluate
-from zenml.client import Client
 from zenml import Model
-from zenml.exceptions import ZenKeyError
+from zenml.client import Client
+from zenml.enums import PluginSubType
 
 
 @click.command(
@@ -89,7 +87,7 @@ def main(
     zenml_model_name: Optional[str] = "zenml-docs-qa-rag",
     zenml_model_version: Optional[str] = None,
 ):
-    """ 
+    """
     Executes the pipeline to train a basic RAG model.
 
     Args:
@@ -108,14 +106,14 @@ def main(
         config = yaml.safe_load(file)
 
     # Read the model version from a file in the root of the repo
-    #  called "ZENML_VERSION.txt". 
+    #  called "ZENML_VERSION.txt".
     if zenml_model_version == "staging":
         postfix = "-rc0"
     elif zenml_model_version == "production":
         postfix = ""
     else:
         postfix = "-dev"
-  
+
     if Path("ZENML_VERSION.txt").exists():
         with open("ZENML_VERSION.txt", "r") as file:
             zenml_model_version = file.read().strip()
@@ -177,7 +175,7 @@ def main(
                 service_account_id=service_account_id,
                 auth_window=0,
                 flavor="builtin",
-                action_type=PluginSubType.PIPELINE_RUN
+                action_type=PluginSubType.PIPELINE_RUN,
             ).id
             client.create_trigger(
                 name="Production Trigger LLM-Complete",

diff --git a/llm-complete-guide/pipelines/__init__.py b/llm-complete-guide/pipelines/__init__.py
@@ -19,5 +19,5 @@
 from pipelines.generate_chunk_questions import generate_chunk_questions
 from pipelines.llm_basic_rag import llm_basic_rag
 from pipelines.llm_eval import llm_eval
+from pipelines.llm_index_and_evaluate import llm_index_and_evaluate
 from pipelines.rag_deployment import rag_deployment
-from pipelines.llm_index_and_evaluate import llm_index_and_evaluate
diff --git a/llm-complete-guide/pipelines/finetune_embeddings.py b/llm-complete-guide/pipelines/finetune_embeddings.py
@@ -12,7 +12,6 @@
 #  or implied. See the License for the specific language governing
 #  permissions and limitations under the License.
 
-from constants import EMBEDDINGS_MODEL_NAME_ZENML
 from steps.finetune_embeddings import (
     evaluate_base_model,
     evaluate_finetuned_model,

diff --git a/llm-complete-guide/pipelines/llm_basic_rag.py b/llm-complete-guide/pipelines/llm_basic_rag.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from litellm import config_path
 
 from steps.populate_index import (
     generate_embeddings,

diff --git a/llm-complete-guide/pipelines/llm_index_and_evaluate.py b/llm-complete-guide/pipelines/llm_index_and_evaluate.py
@@ -15,9 +15,10 @@
 # limitations under the License.
 #
 
-from pipelines import llm_basic_rag, llm_eval
 from zenml import pipeline
 
+from pipelines import llm_basic_rag, llm_eval
+
 
 @pipeline
 def llm_index_and_evaluate() -> None:

diff --git a/llm-complete-guide/requirements.txt b/llm-complete-guide/requirements.txt
@@ -1,4 +1,4 @@
-zenml[server]
+zenml[server]>=0.73.0
 ratelimit
 pgvector
 psycopg2-binary
@@ -21,6 +21,7 @@ torch
 gradio
 huggingface-hub
 elasticsearch
+tenacity
 
 # optional requirements for S3 artifact store
 # s3fs>2022.3.0

diff --git a/llm-complete-guide/run.py b/llm-complete-guide/run.py
@@ -47,12 +47,12 @@
     generate_synthetic_data,
     llm_basic_rag,
     llm_eval,
-    rag_deployment,
     llm_index_and_evaluate,
+    rag_deployment,
 )
 from structures import Document
-from zenml.materializers.materializer_registry import materializer_registry
 from zenml import Model
+from zenml.materializers.materializer_registry import materializer_registry
 
 logger = get_logger(__name__)
 
@@ -136,6 +136,12 @@
     default=None,
     help="Path to config",
 )
+@click.option(
+    "--query-text",
+    "query_text",
+    default=None,
+    help="Query text",
+)
 def main(
     pipeline: str,
     query_text: Optional[str] = None,
@@ -169,9 +175,9 @@ def main(
             }
         },
     }
-    
+
     # Read the model version from a file in the root of the repo
-    #  called "ZENML_VERSION.txt".    
+    #  called "ZENML_VERSION.txt".
     if zenml_model_version == "staging":
         postfix = "-rc0"
     elif zenml_model_version == "production":
@@ -264,7 +270,9 @@ def main(
 
     elif pipeline == "embeddings":
         finetune_embeddings.with_options(
-            model=zenml_model, config_path=config_path, **embeddings_finetune_args
+            model=zenml_model,
+            config_path=config_path,
+            **embeddings_finetune_args,
         )()
 
     elif pipeline == "chunks":

diff --git a/llm-complete-guide/steps/eval_e2e.py b/llm-complete-guide/steps/eval_e2e.py
@@ -16,8 +16,17 @@
 
 import json
 import logging
+import warnings
 from typing import Annotated, Callable, Tuple
 
+# Suppress the specific FutureWarning about clean_up_tokenization_spaces
+warnings.filterwarnings(
+    "ignore",
+    message=".*clean_up_tokenization_spaces.*",
+    category=FutureWarning,
+    module="transformers.tokenization_utils_base",
+)
+
 from datasets import load_dataset
 from litellm import completion
 from pydantic import BaseModel, conint
@@ -315,13 +324,11 @@ def run_simple_tests(test_data: list, test_function: Callable) -> float:
 
 
 @step
-def e2e_evaluation() -> (
-    Tuple[
-        Annotated[float, "failure_rate_bad_answers"],
-        Annotated[float, "failure_rate_bad_immediate_responses"],
-        Annotated[float, "failure_rate_good_responses"],
-    ]
-):
+def e2e_evaluation() -> Tuple[
+    Annotated[float, "failure_rate_bad_answers"],
+    Annotated[float, "failure_rate_bad_immediate_responses"],
+    Annotated[float, "failure_rate_good_responses"],
+]:
     """Executes the end-to-end evaluation step."""
     logging.info("Testing bad answers...")
     failure_rate_bad_answers = run_simple_tests(
@@ -352,14 +359,12 @@ def e2e_evaluation() -> (
 
 
 @step
-def e2e_evaluation_llm_judged() -> (
-    Tuple[
-        Annotated[float, "average_toxicity_score"],
-        Annotated[float, "average_faithfulness_score"],
-        Annotated[float, "average_helpfulness_score"],
-        Annotated[float, "average_relevance_score"],
-    ]
-):
+def e2e_evaluation_llm_judged() -> Tuple[
+    Annotated[float, "average_toxicity_score"],
+    Annotated[float, "average_faithfulness_score"],
+    Annotated[float, "average_helpfulness_score"],
+    Annotated[float, "average_relevance_score"],
+]:
     """Executes the end-to-end evaluation step.
 
     Returns: