Skip to content

Commit ebe3621

Browse files
committed
RAG working again
1 parent 42f952a commit ebe3621

File tree

6 files changed

+8
-20
lines changed

6 files changed

+8
-20
lines changed

llm-complete-guide/requirements-argilla.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ litellm
55
ollama
66
polars
77
datasets
8-
git+https://github.com/argilla-io/argilla.git@releases/2.0.1#subdirectory=argilla # replace once released
8+
argilla
99
distilabel

llm-complete-guide/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
zenml[server]>=0.63.0
1+
zenml[server]>=0.67.0
22
langchain-community
33
ratelimit
44
langchain>=0.0.325
@@ -19,6 +19,7 @@ matplotlib
1919
pyarrow
2020
rerankers[all]
2121
datasets
22+
torch
2223

2324
# optional requirements for S3 artifact store
2425
# s3fs>2022.3.0

llm-complete-guide/run.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
from materializers.document_materializer import DocumentMaterializer
4343
from pipelines import (
4444
finetune_embeddings,
45-
generate_synthetic_data,
4645
llm_basic_rag,
4746
llm_eval,
4847
)
@@ -103,13 +102,6 @@
103102
default=False,
104103
help="Disable cache.",
105104
)
106-
@click.option(
107-
"--synthetic",
108-
"synthetic",
109-
is_flag=True,
110-
default=False,
111-
help="Run the synthetic data pipeline.",
112-
)
113105
@click.option(
114106
"--local",
115107
"local",
@@ -151,7 +143,6 @@ def main(
151143
query: Optional[str] = None,
152144
model: str = OPENAI_MODEL,
153145
no_cache: bool = False,
154-
synthetic: bool = False,
155146
local: bool = False,
156147
embeddings: bool = False,
157148
dummyembeddings: bool = False,
@@ -166,10 +157,11 @@ def main(
166157
query (Optional[str]): If provided, the RAG model will be queried with this string.
167158
model (str): The model to use for the completion. Default is OPENAI_MODEL.
168159
no_cache (bool): If `True`, cache will be disabled.
169-
synthetic (bool): If `True`, the synthetic data pipeline will be run.
170160
local (bool): If `True`, the local LLM via Ollama will be used.
161+
dummyembeddings (bool): If `True`, dummyembeddings will be used
171162
embeddings (bool): If `True`, the embeddings will be fine-tuned.
172163
argilla (bool): If `True`, the Argilla annotations will be used.
164+
reranked (bool): If `True`, rerankers will be used
173165
"""
174166
pipeline_args = {"enable_cache": not no_cache}
175167
embeddings_finetune_args = {
@@ -191,12 +183,11 @@ def main(
191183
md = Markdown(response)
192184
console.print(md)
193185

186+
print(f"Running Pipeline with pipeline args: {pipeline_args}")
194187
if rag:
195188
llm_basic_rag.with_options(**pipeline_args)()
196189
if evaluation:
197190
llm_eval.with_options(**pipeline_args)()
198-
if synthetic:
199-
generate_synthetic_data.with_options(**pipeline_args)()
200191
if embeddings:
201192
finetune_embeddings.with_options(**embeddings_finetune_args)()
202193
if dummyembeddings:

llm-complete-guide/steps/hf_dataset_loader.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,3 @@ def load_hf_dataset() -> (
2929
train_dataset = load_dataset(DATASET_NAME_DEFAULT, split="train")
3030
test_dataset = load_dataset(DATASET_NAME_DEFAULT, split="test")
3131
return train_dataset, test_dataset
32-
33-
34-
load_hf_dataset()

llm-complete-guide/steps/populate_index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def generate_embeddings(
9595
model = SentenceTransformer(EMBEDDINGS_MODEL)
9696

9797
log_artifact_metadata(
98-
artifact_name="embeddings",
98+
artifact_name="documents_with_embeddings",
9999
metadata={
100100
"embedding_type": EMBEDDINGS_MODEL,
101101
"embedding_dimensionality": EMBEDDING_DIMENSIONALITY,

llm-complete-guide/steps/url_scraper.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from steps.url_scraping_utils import get_all_pages
2121

2222

23-
@step
23+
@step(enable_cache=True)
2424
def url_scraper(
2525
docs_url: str = "https://docs.zenml.io",
2626
repo_url: str = "https://github.com/zenml-io/zenml",
@@ -31,7 +31,6 @@ def url_scraper(
3131
Args:
3232
docs_url: URL to the documentation.
3333
repo_url: URL to the repository.
34-
release_notes_url: URL to the release notes.
3534
website_url: URL to the website.
3635
3736
Returns:

0 commit comments

Comments
 (0)