Skip to content

Commit a5c8be6

Browse files
committed
Merge branch 'misc/add-deployment-llm-complete' of https://github.com/zenml-io/zenml-projects into misc/add-deployment-llm-complete
2 parents 1d93385 + 7bf314e commit a5c8be6

File tree

10 files changed

+47
-30
lines changed

10 files changed

+47
-30
lines changed

llm-complete-guide/steps/chunk_documents.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
)
2222
from structures import Document
2323
from utils.llm_utils import split_documents
24-
from zenml import log_artifact_metadata, step
24+
from zenml import log_metadata, step
2525
from zenml.logger import get_logger
2626

2727
logger = get_logger(__name__)
@@ -137,8 +137,9 @@ def chunk_documents(
137137
logger.info(
138138
f"Number of documents after chunking: {num_docs_after_chunking}"
139139
)
140-
log_artifact_metadata(
140+
log_metadata(
141141
artifact_name="chunked_documents",
142+
infer_artifact=True,
142143
metadata={
143144
"before_chunking_count": num_docs_before_chunking,
144145
"after_chunking_count": num_docs_after_chunking,

llm-complete-guide/steps/eval_pii.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import matplotlib.pyplot as plt
77
from datasets import Dataset
88
from PIL import Image
9-
from zenml import log_artifact_metadata, step
9+
from zenml import log_metadata, step
1010

1111

1212
class PIIDetector:
@@ -305,8 +305,9 @@ def eval_pii(
305305
"dates_found": train_results["statistics"]["total_findings"]["dates"],
306306
"ips_found": train_results["statistics"]["total_findings"]["ips"],
307307
}
308-
log_artifact_metadata(
309-
metadata=train_metadata, artifact_name="train_pii_results"
308+
log_metadata(
309+
metadata=train_metadata, artifact_name="train_pii_results",
310+
infer_artifact=True
310311
)
311312

312313
test_metadata = {
@@ -320,8 +321,9 @@ def eval_pii(
320321
"dates_found": test_results["statistics"]["total_findings"]["dates"],
321322
"ips_found": test_results["statistics"]["total_findings"]["ips"],
322323
}
323-
log_artifact_metadata(
324-
metadata=test_metadata, artifact_name="test_pii_results"
324+
log_metadata(
325+
metadata=test_metadata, artifact_name="test_pii_results",
326+
infer_artifact=True
325327
)
326328

327329
pii_chart = plot_pii_results(train_results, test_results)

llm-complete-guide/steps/finetune_embeddings.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
)
4848
from sentence_transformers.training_args import BatchSamplers
4949
from sentence_transformers.util import cos_sim
50-
from zenml import ArtifactConfig, log_model_metadata, step
50+
from zenml import ArtifactConfig, log_metadata, step
5151
from zenml.client import Client
5252
from zenml.utils.cuda_utils import cleanup_gpu_memory
5353

@@ -168,8 +168,8 @@ def evaluate_base_model(
168168
for dim in EMBEDDINGS_MODEL_MATRYOSHKA_DIMS
169169
}
170170

171-
log_model_metadata(
172-
metadata={"base_model_eval": base_model_eval},
171+
log_metadata(
172+
metadata={"base_model_eval": base_model_eval}, infer_model=True
173173
)
174174

175175
return results
@@ -201,8 +201,8 @@ def evaluate_finetuned_model(
201201
for dim in EMBEDDINGS_MODEL_MATRYOSHKA_DIMS
202202
}
203203

204-
log_model_metadata(
205-
metadata={"finetuned_model_eval": finetuned_model_eval},
204+
log_metadata(
205+
metadata={"finetuned_model_eval": finetuned_model_eval}, infer_model=True
206206
)
207207

208208
return results
@@ -298,7 +298,8 @@ def finetune(
298298
token=zenml_client.get_secret(SECRET_NAME).secret_values["hf_token"],
299299
)
300300

301-
log_model_metadata(
301+
log_metadata(
302+
infer_model=True,
302303
metadata={
303304
"training_params": {
304305
"num_train_epochs": epochs,

llm-complete-guide/steps/finetune_embeddings_legacy.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from torch.nn import CosineSimilarity
2626
from torch.utils.data import DataLoader
2727
from utils.visualization_utils import create_comparison_chart
28-
from zenml import log_artifact_metadata, step
28+
from zenml import log_metadata, step
2929
from zenml.logger import get_logger
3030

3131
logger = get_logger(__name__)
@@ -79,12 +79,14 @@ def load_datasets(
7979
print("train_dataset_length_raw", len(train_dataset))
8080
print("test_dataset_length_raw", len(test_dataset))
8181

82-
log_artifact_metadata(
82+
log_metadata(
8383
artifact_name="train_dataset",
84+
infer_artifact=True,
8485
metadata={"row_count": len(train_dataset)},
8586
)
86-
log_artifact_metadata(
87+
log_metadata(
8788
artifact_name="test_dataset",
89+
infer_artifact=True,
8890
metadata={"row_count": len(test_dataset)},
8991
)
9092

@@ -187,8 +189,9 @@ def train_model(
187189
warmup_steps=warmup_steps,
188190
)
189191

190-
log_artifact_metadata(
192+
log_metadata(
191193
artifact_name="trained_model",
194+
infer_artifact=True,
192195
metadata={
193196
"model_path": model_path,
194197
"num_epochs": num_epochs,
@@ -280,8 +283,9 @@ def evaluate_model(
280283
finetuned_similarity=finetuned_avg_sim,
281284
)
282285

283-
log_artifact_metadata(
286+
log_metadata(
284287
artifact_name="evaluation_results",
288+
infer_artifact=True,
285289
metadata={
286290
"pretrained_average_similarity": {
287291
"value": pretrained_avg_sim,

llm-complete-guide/steps/generate_questions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from rich import print
2222
from structures import Document
2323
from utils.openai_utils import get_openai_api_key
24-
from zenml import log_artifact_metadata, step
24+
from zenml import log_metadata, step
2525
from zenml.logger import get_logger
2626

2727
logger = get_logger(__name__)
@@ -160,8 +160,9 @@ def generate_questions(
160160
f"Generated {len(final_df)} questions for {len(documents)} documents."
161161
)
162162

163-
log_artifact_metadata(
163+
log_metadata(
164164
artifact_name="generated_questions",
165+
infer_artifact=True,
165166
metadata={
166167
"num_documents": len(documents),
167168
"num_questions_generated": len(final_df),

llm-complete-guide/steps/markdown_loader.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import polars as pl
2020
from constants import FILES_TO_IGNORE
21-
from zenml import log_artifact_metadata, step
21+
from zenml import log_metadata, step
2222
from zenml.logger import get_logger
2323

2424
logger = get_logger(__name__)
@@ -61,8 +61,9 @@ def load_markdown_files(
6161
f"Subfolder '{subfolder}' not found in the cloned repository."
6262
)
6363

64-
log_artifact_metadata(
64+
log_metadata(
6565
artifact_name="markdown_files",
66+
infer_artifact=True,
6667
metadata={
6768
"num_markdown_files": len(markdown_files),
6869
"columns": "filename, page_content",

llm-complete-guide/steps/populate_index.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from sentence_transformers import SentenceTransformer
4040
from structures import Document
4141
from utils.llm_utils import get_db_conn, get_es_client, split_documents
42-
from zenml import ArtifactConfig, log_artifact_metadata, step, log_model_metadata
42+
from zenml import ArtifactConfig, log_metadata, step, log_metadata
4343
from zenml.metadata.metadata_types import Uri
4444
from zenml.client import Client
4545
from constants import SECRET_NAME
@@ -515,8 +515,9 @@ def preprocess_documents(
515515
Exception: If an error occurs during preprocessing.
516516
"""
517517
try:
518-
log_artifact_metadata(
518+
log_metadata(
519519
artifact_name="split_chunks",
520+
infer_artifact=True,
520521
metadata={
521522
"chunk_size": CHUNK_SIZE,
522523
"chunk_overlap": CHUNK_OVERLAP,
@@ -536,8 +537,9 @@ def preprocess_documents(
536537
histogram_chart: Image.Image = create_histogram(stats)
537538
bar_chart: Image.Image = create_bar_chart(stats)
538539

539-
log_artifact_metadata(
540+
log_metadata(
540541
artifact_name="split_chunks",
542+
infer_artifact=True,
541543
metadata=stats,
542544
)
543545

@@ -568,8 +570,9 @@ def generate_embeddings(
568570
try:
569571
model = SentenceTransformer(EMBEDDINGS_MODEL)
570572

571-
log_artifact_metadata(
573+
log_metadata(
572574
artifact_name="documents_with_embeddings",
575+
infer_artifact=True,
573576
metadata={
574577
"embedding_type": EMBEDDINGS_MODEL,
575578
"embedding_dimensionality": EMBEDDING_DIMENSIONALITY,
@@ -828,7 +831,8 @@ def _log_metadata(index_type: IndexType) -> None:
828831
"dbname": "postgres",
829832
}
830833

831-
log_model_metadata(
834+
log_metadata(
835+
infer_model=True,
832836
metadata={
833837
"embeddings": {
834838
"model": EMBEDDINGS_MODEL,

llm-complete-guide/steps/url_scraper.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import json
1717

1818
from typing_extensions import Annotated
19-
from zenml import ArtifactConfig, log_artifact_metadata, step
19+
from zenml import ArtifactConfig, log_metadata, step
2020

2121
from steps.url_scraping_utils import get_all_pages
2222

@@ -58,8 +58,9 @@ def url_scraper(
5858
# website_urls = get_all_pages(website_url)
5959
# all_urls = docs_urls + website_urls + examples_readme_urls
6060
all_urls = docs_urls
61-
log_artifact_metadata(
61+
log_metadata(
6262
artifact_name="urls",
63+
infer_artifact=True,
6364
metadata={
6465
"count": len(all_urls),
6566
},

llm-complete-guide/steps/visualize_chat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from typing import Any, Dict
2+
23
from typing_extensions import Annotated
34
from zenml import get_step_context, log_metadata, step
45
from zenml.metadata.metadata_types import Uri
56
from zenml.types import HTMLString
67
from zenml.utils.dashboard_utils import get_model_version_url
78

9+
810
@step(enable_cache=False)
911
def create_chat_interface(
1012
deployment_info: Dict[str, Any],

llm-complete-guide/utils/llm_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ def find_vectorstore_name() -> str:
441441
client = Client()
442442
model = client.get_model_version(ZENML_CHATBOT_MODEL, model_version_name_or_number_or_id="v0.68.1-dev")
443443

444-
return model.run_metadata["vector_store"].value["name"]
444+
return model.run_metadata["vector_store"]["name"]
445445

446446

447447
def rerank_documents(

0 commit comments

Comments
 (0)