Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,14 @@


@step(enable_cache=False)
def deployment_deploy() -> Annotated[
Optional[DatabricksDeploymentService],
ArtifactConfig(name="databricks_deployment", is_deployment_artifact=True),
]:
def deployment_deploy() -> (
Annotated[
Optional[DatabricksDeploymentService],
ArtifactConfig(
name="databricks_deployment", is_deployment_artifact=True
),
]
):
"""Predictions step.

This is an example of a predictions step that takes the data in and returns
Expand Down
1 change: 0 additions & 1 deletion end-to-end-computer-vision/steps/export_label_studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
logger = get_logger(__name__)



@step(
output_materializers={
LABELED_DATASET_NAME: LabelStudioAnnotationMaterializer
Expand Down
4 changes: 3 additions & 1 deletion eurorate-predictor/pipelines/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@


@pipeline
def ecb_predictor_model_training_pipeline(augmented_dataset_id, mode: str = "develop"):
def ecb_predictor_model_training_pipeline(
augmented_dataset_id, mode: str = "develop"
):
"""A pipeline to train an XGBoost model and promote it.
Args:
Expand Down
14 changes: 9 additions & 5 deletions eurorate-predictor/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ def main(
pipeline_args["config_path"] = os.path.join(
config_folder, f"etl_{mode}.yaml"
)
ecb_predictor_etl_pipeline.with_options(**pipeline_args)(**run_args_etl)
ecb_predictor_etl_pipeline.with_options(**pipeline_args)(
**run_args_etl
)
logger.info("ETL pipeline finished successfully!\n")

# Execute Feature Engineering Pipeline
Expand All @@ -126,9 +128,9 @@ def main(
pipeline_args["config_path"] = os.path.join(
config_folder, f"feature_engineering_{mode}.yaml"
)
ecb_predictor_feature_engineering_pipeline.with_options(**pipeline_args)(
**run_args_feature
)
ecb_predictor_feature_engineering_pipeline.with_options(
**pipeline_args
)(**run_args_feature)
logger.info("Feature Engineering pipeline finished successfully!\n")

# Execute Model Training Pipeline
Expand All @@ -153,7 +155,9 @@ def main(
pipeline_args["config_path"] = os.path.join(
config_folder, f"training_{mode}.yaml"
)
ecb_predictor_model_training_pipeline.with_options(**pipeline_args)(**run_args_train)
ecb_predictor_model_training_pipeline.with_options(**pipeline_args)(
**run_args_train
)
logger.info("Model Training pipeline finished successfully!\n")


Expand Down
3 changes: 1 addition & 2 deletions gamesense/steps/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@
from utils.loaders import load_base_model
from utils.tokenizer import load_tokenizer
from zenml import ArtifactConfig, step
from zenml.client import Client
from zenml.enums import ArtifactType
from zenml.logger import get_logger
from zenml.materializers import BuiltInMaterializer
from zenml.utils.cuda_utils import cleanup_gpu_memory
from zenml.client import Client


logger = get_logger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def deploy_to_huggingface(
save_model_to_deploy.entrypoint()

logger.info("Model saved locally. Pushing to HuggingFace...")
assert secret, (
"No secret found with name 'huggingface_creds'. Please create one with your `token`."
)
assert secret, "No secret found with name 'huggingface_creds'. Please create one with your `token`."

token = secret.secret_values["token"]
api = HfApi(token=token)
Expand Down
10 changes: 6 additions & 4 deletions huggingface-sagemaker/steps/promotion/promote_get_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@


@step
def promote_get_metrics() -> Tuple[
Annotated[Dict[str, Any], "latest_metrics"],
Annotated[Dict[str, Any], "current_metrics"],
]:
def promote_get_metrics() -> (
Tuple[
Annotated[Dict[str, Any], "latest_metrics"],
Annotated[Dict[str, Any], "current_metrics"],
]
):
"""Get metrics for comparison for promoting a model.

This is an example of a metric retrieval step. It is used to retrieve
Expand Down
2 changes: 1 addition & 1 deletion llm-complete-guide/pipelines/llm_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from typing import Optional

import click
from steps.create_prompt import PROMPT, create_prompt
from steps.create_prompt import PROMPT
from steps.eval_e2e import e2e_evaluation, e2e_evaluation_llm_judged
from steps.eval_retrieval import (
retrieval_evaluation_full,
Expand Down
1 change: 1 addition & 0 deletions llm-complete-guide/steps/create_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
answers. \
"""


@step
def create_prompt() -> str:
"""Create a prompt for the RAG pipeline."""
Expand Down
12 changes: 6 additions & 6 deletions llm-complete-guide/steps/eval_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,9 @@ def perform_small_retrieval_evaluation(use_reranking: bool) -> float:


@step
def retrieval_evaluation_small() -> Annotated[
float, "small_failure_rate_retrieval"
]:
def retrieval_evaluation_small() -> (
Annotated[float, "small_failure_rate_retrieval"]
):
"""Executes the retrieval evaluation step without reranking.
Returns:
Expand All @@ -287,9 +287,9 @@ def retrieval_evaluation_small() -> Annotated[


@step
def retrieval_evaluation_small_with_reranking() -> Annotated[
float, "small_failure_rate_retrieval_reranking"
]:
def retrieval_evaluation_small_with_reranking() -> (
Annotated[float, "small_failure_rate_retrieval_reranking"]
):
"""Executes the retrieval evaluation step with reranking.
Returns:
Expand Down
117 changes: 60 additions & 57 deletions llm-complete-guide/steps/eval_visualisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
# or implied. See the License for the specific language governing
# permissions and limitations under the License.

from typing import Annotated, Dict, List, Tuple
from typing import Annotated, Dict, List

import plotly.graph_objects as go
from plotly.subplots import make_subplots
from zenml import ArtifactConfig, get_step_context, log_metadata, step
from zenml import get_step_context, log_metadata, step
from zenml.types import HTMLString


def create_plotly_bar_chart(
labels: List[str],
scores: List[float],
Expand All @@ -43,54 +43,66 @@ def create_plotly_bar_chart(
"""
# Generate colors for bars
if alternate_colors:
colors = ["rgba(66, 133, 244, 0.8)" if i % 2 == 0 else "rgba(219, 68, 55, 0.8)" for i in range(len(labels))]
colors = [
"rgba(66, 133, 244, 0.8)"
if i % 2 == 0
else "rgba(219, 68, 55, 0.8)"
for i in range(len(labels))
]
else:
colors = ["rgba(66, 133, 244, 0.8)" for _ in range(len(labels))]

# Prepare hover text
if descriptions:
hover_text = [f"<b>{label}</b><br>Value: {score:.2f}<br>{descriptions.get(label, '')}"
for label, score in zip(labels, scores)]
hover_text = [
f"<b>{label}</b><br>Value: {score:.2f}<br>{descriptions.get(label, '')}"
for label, score in zip(labels, scores)
]
else:
hover_text = [f"<b>{label}</b><br>Value: {score:.2f}" for label, score in zip(labels, scores)]
hover_text = [
f"<b>{label}</b><br>Value: {score:.2f}"
for label, score in zip(labels, scores)
]

# Create figure
fig = go.Figure()

fig.add_trace(
go.Bar(
y=labels,
x=scores,
orientation='h',
orientation="h",
marker_color=colors,
text=[f"{score:.2f}" for score in scores],
textposition='auto',
textposition="auto",
hovertext=hover_text,
hoverinfo='text',
hoverinfo="text",
)
)

# Set layout
max_value = max(scores) if scores else 5
xaxis_range = [0, 100] if percentage_scale else [0, max(5, max_value * 1.1)]
xaxis_range = (
[0, 100] if percentage_scale else [0, max(5, max_value * 1.1)]
)
xaxis_title = "Percentage (%)" if percentage_scale else "Score"

fig.update_layout(
title=title,
xaxis=dict(
title=xaxis_title,
range=xaxis_range,
showgrid=True,
gridcolor='rgba(230, 230, 230, 0.8)',
gridcolor="rgba(230, 230, 230, 0.8)",
),
yaxis=dict(
autorange="reversed", # Make labels read top-to-bottom
),
margin=dict(l=20, r=20, t=60, b=20),
height=max(300, 70 * len(labels)),
plot_bgcolor='rgba(255, 255, 255, 1)',
plot_bgcolor="rgba(255, 255, 255, 1)",
)

return fig


Expand Down Expand Up @@ -122,58 +134,49 @@ def generate_evaluation_html(
"""
# Metric descriptions for hovering
metric_descriptions = {
"Small Retrieval Eval Failure Rate":
"Percentage of small test cases where retrieval failed to find relevant documents.",
"Small Retrieval Eval Failure Rate Reranking":
"Percentage of small test cases where retrieval with reranking failed to find relevant documents.",
"Full Retrieval Eval Failure Rate":
"Percentage of all test cases where retrieval failed to find relevant documents.",
"Full Retrieval Eval Failure Rate Reranking":
"Percentage of all test cases where retrieval with reranking failed to find relevant documents.",
"Failure Rate Bad Answers":
"Percentage of responses that were factually incorrect or misleading.",
"Failure Rate Bad Immediate Responses":
"Percentage of immediate responses that did not adequately address the query.",
"Failure Rate Good Responses":
"Percentage of responses rated as good by evaluators.",
"Average Toxicity Score":
"Average score measuring harmful, offensive, or inappropriate content (lower is better).",
"Average Faithfulness Score":
"Average score measuring how accurately the response represents the source material (higher is better).",
"Average Helpfulness Score":
"Average score measuring the practical utility of responses to users (higher is better).",
"Average Relevance Score":
"Average score measuring how well responses address the specific query intent (higher is better).",
"Small Retrieval Eval Failure Rate": "Percentage of small test cases where retrieval failed to find relevant documents.",
"Small Retrieval Eval Failure Rate Reranking": "Percentage of small test cases where retrieval with reranking failed to find relevant documents.",
"Full Retrieval Eval Failure Rate": "Percentage of all test cases where retrieval failed to find relevant documents.",
"Full Retrieval Eval Failure Rate Reranking": "Percentage of all test cases where retrieval with reranking failed to find relevant documents.",
"Failure Rate Bad Answers": "Percentage of responses that were factually incorrect or misleading.",
"Failure Rate Bad Immediate Responses": "Percentage of immediate responses that did not adequately address the query.",
"Failure Rate Good Responses": "Percentage of responses rated as good by evaluators.",
"Average Toxicity Score": "Average score measuring harmful, offensive, or inappropriate content (lower is better).",
"Average Faithfulness Score": "Average score measuring how accurately the response represents the source material (higher is better).",
"Average Helpfulness Score": "Average score measuring the practical utility of responses to users (higher is better).",
"Average Relevance Score": "Average score measuring how well responses address the specific query intent (higher is better).",
}

# Create individual charts
retrieval_fig = create_plotly_bar_chart(
retrieval_labels,
retrieval_scores,
f"Retrieval Evaluation Metrics",
retrieval_labels,
retrieval_scores,
f"Retrieval Evaluation Metrics",
alternate_colors=True,
descriptions=metric_descriptions
descriptions=metric_descriptions,
)

generation_basic_fig = create_plotly_bar_chart(
generation_basic_labels,
generation_basic_scores,
f"Basic Generation Metrics",
generation_basic_labels,
generation_basic_scores,
f"Basic Generation Metrics",
percentage_scale=True,
descriptions=metric_descriptions
descriptions=metric_descriptions,
)

generation_quality_fig = create_plotly_bar_chart(
generation_quality_labels,
generation_quality_scores,
generation_quality_labels,
generation_quality_scores,
f"Generation Quality Metrics",
descriptions=metric_descriptions
descriptions=metric_descriptions,
)

# Create summary metrics cards
composite_quality = metrics_metadata.get("composite.overall_quality", 0)
retrieval_effectiveness = metrics_metadata.get("composite.retrieval_effectiveness", 0)

retrieval_effectiveness = metrics_metadata.get(
"composite.retrieval_effectiveness", 0
)

# Combine into complete HTML report
html = f"""
<!DOCTYPE html>
Expand Down Expand Up @@ -388,7 +391,7 @@ def generate_evaluation_html(
</body>
</html>
"""

return HTMLString(html)


Expand Down Expand Up @@ -434,10 +437,10 @@ def visualize_evaluation_results(
+ average_helpfulness_score
+ average_relevance_score
) / 3

composite_retrieval_effectiveness = (
(1 - small_retrieval_eval_failure_rate/100)
+ (1 - full_retrieval_eval_failure_rate/100)
(1 - small_retrieval_eval_failure_rate / 100)
+ (1 - full_retrieval_eval_failure_rate / 100)
) / 2

# Collect all metrics for dashboard and logging
Expand Down
6 changes: 3 additions & 3 deletions llm-complete-guide/steps/hf_dataset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@


@step(output_materializers=HFDatasetMaterializer)
def load_hf_dataset() -> Tuple[
Annotated[Dataset, "train"], Annotated[Dataset, "test"]
]:
def load_hf_dataset() -> (
Tuple[Annotated[Dataset, "train"], Annotated[Dataset, "test"]]
):
train_dataset = load_dataset(DATASET_NAME_DEFAULT, split="train")
test_dataset = load_dataset(DATASET_NAME_DEFAULT, split="test")
return train_dataset, test_dataset
2 changes: 1 addition & 1 deletion llm-complete-guide/steps/url_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def url_scraper(
"https://docs.zenml.io/stack-components/step-operators/azureml",
# "https://docs.zenml.io/how-to/interact-with-secrets",
# "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/service-connectors-guide",
# "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/hyperai-service-connector",
# "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/hyperai-service-connector",
# "https://docs.zenml.io/stack-components/data-validators/evidently",
# "https://docs.zenml.io/stack-components/data-validators",
# "https://docs.zenml.io/stack-components/step-operators/sagemaker",
Expand Down
Loading
Loading