formatting

strickvl · strickvl · commit 3440fbc662b0 · 2024-10-31T11:13:10.000+01:00
diff --git a/llm-complete-guide/gh_action_rag.py b/llm-complete-guide/gh_action_rag.py
@@ -21,11 +21,10 @@
 
 import click
 import yaml
+from pipelines.llm_basic_rag import llm_basic_rag
 from zenml.client import Client
 from zenml.exceptions import ZenKeyError
 
-from pipelines.llm_basic_rag import llm_basic_rag
-
 
 @click.command(
     help="""
@@ -39,7 +38,6 @@
     default=False,
     help="Disable cache.",
 )
-
 @click.option(
     "--create-template",
     "create_template",
@@ -51,26 +49,26 @@
     "--config",
     "config",
     default="rag_local_dev.yaml",
-    help="Specify a configuration file"
+    help="Specify a configuration file",
 )
 @click.option(
     "--service-account-id",
     "service_account_id",
     default=None,
-    help="Specify a service account ID"
+    help="Specify a service account ID",
 )
 @click.option(
     "--event-source-id",
     "event_source_id",
     default=None,
-    help="Specify an event source ID"
+    help="Specify an event source ID",
 )
 def main(
     no_cache: bool = False,
-    config: Optional[str]= "rag_local_dev.yaml",
+    config: Optional[str] = "rag_local_dev.yaml",
     create_template: bool = False,
     service_account_id: Optional[str] = None,
-    event_source_id: Optional[str] = None
+    event_source_id: Optional[str] = None,
 ):
     """
     Executes the pipeline to train a basic RAG model.
@@ -86,43 +84,43 @@ def main(
     client = Client()
     config_path = Path(__file__).parent / "configs" / config
 
-    with (open(config_path,"r") as file):
+    with open(config_path, "r") as file:
         config = yaml.safe_load(file)
 
     if create_template:
-
         # run pipeline
         run = llm_basic_rag.with_options(
-            config_path=str(config_path),
-            enable_cache=not no_cache
+            config_path=str(config_path), enable_cache=not no_cache
         )()
         # create new run template
         rt = client.create_run_template(
             name=f"production-llm-complete-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}",
-            deployment_id=run.deployment_id
+            deployment_id=run.deployment_id,
         )
 
         try:
             # Check if an action ahs already be configured for this pipeline
             action = client.get_action(
                 name_id_or_prefix="LLM Complete (production)",
-                allow_name_prefix_match=True
+                allow_name_prefix_match=True,
             )
         except ZenKeyError:
             if not event_source_id:
-                raise RuntimeError("An event source is required for this workflow.")
+                raise RuntimeError(
+                    "An event source is required for this workflow."
+                )
 
             if not service_account_id:
                 service_account_id = client.create_service_account(
                     name="github-action-sa",
-                    description="To allow triggered pipelines to run with M2M authentication."
+                    description="To allow triggered pipelines to run with M2M authentication.",
                 ).id
 
             action_id = client.create_action(
                 name="LLM Complete (production)",
                 configuration={
                     "template_id": str(rt.id),
-                    "run_config": pop_restricted_configs(config)
+                    "run_config": pop_restricted_configs(config),
                 },
                 service_account_id=service_account_id,
                 auth_window=0,
@@ -132,7 +130,7 @@ def main(
                 event_source_id=UUID(event_source_id),
                 event_filter={"event_type": "tag_event"},
                 action_id=action_id,
-                description="Trigger pipeline to reindex everytime the docs are updated through git."
+                description="Trigger pipeline to reindex everytime the docs are updated through git.",
             )
         else:
             # update the action with the new template
@@ -141,14 +139,13 @@ def main(
                 name_id_or_prefix=action.id,
                 configuration={
                     "template_id": str(rt.id),
-                    "run_config": pop_restricted_configs(config)
-                }
+                    "run_config": pop_restricted_configs(config),
+                },
             )
 
     else:
         llm_basic_rag.with_options(
-            config_path=str(config_path),
-            enable_cache=not no_cache
+            config_path=str(config_path), enable_cache=not no_cache
         )()
 
 
@@ -162,22 +159,22 @@ def pop_restricted_configs(run_configuration: dict) -> dict:
         Modified dictionary with restricted items removed
     """
     # Pop top-level restricted items
-    run_configuration.pop('parameters', None)
-    run_configuration.pop('build', None)
-    run_configuration.pop('schedule', None)
+    run_configuration.pop("parameters", None)
+    run_configuration.pop("build", None)
+    run_configuration.pop("schedule", None)
 
     # Pop docker settings if they exist
-    if 'settings' in run_configuration:
-        run_configuration['settings'].pop('docker', None)
+    if "settings" in run_configuration:
+        run_configuration["settings"].pop("docker", None)
 
     # Pop docker settings from steps if they exist
-    if 'steps' in run_configuration:
-        for step in run_configuration['steps'].values():
-            if 'settings' in step:
-                step['settings'].pop('docker', None)
+    if "steps" in run_configuration:
+        for step in run_configuration["steps"].values():
+            if "settings" in step:
+                step["settings"].pop("docker", None)
 
     return run_configuration
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/llm-complete-guide/pipelines/llm_basic_rag.py b/llm-complete-guide/pipelines/llm_basic_rag.py
@@ -15,15 +15,14 @@
 # limitations under the License.
 #
 
-from zenml import pipeline
-
 from steps.populate_index import (
     generate_embeddings,
     index_generator,
     preprocess_documents,
 )
 from steps.url_scraper import url_scraper
 from steps.web_url_loader import web_url_loader
+from zenml import pipeline
 
 
 @pipeline
diff --git a/llm-complete-guide/pipelines/llm_eval.py b/llm-complete-guide/pipelines/llm_eval.py
@@ -13,12 +13,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
 from pathlib import Path
 from typing import Optional
 
 import click
-
 from steps.eval_e2e import e2e_evaluation, e2e_evaluation_llm_judged
 from steps.eval_retrieval import (
     retrieval_evaluation_full,
@@ -82,12 +80,9 @@ def llm_eval() -> None:
     "--config",
     "config",
     default="rag_local_dev.yaml",
-    help="Specify a configuration file"
+    help="Specify a configuration file",
 )
-def main(
-        no_cache: bool = False,
-        config: Optional[str] = "rag_eval.yaml"
-):
+def main(no_cache: bool = False, config: Optional[str] = "rag_eval.yaml"):
     """
     Executes the pipeline to train a basic RAG model.
 
@@ -98,10 +93,9 @@ def main(
     config_path = Path(__file__).parent.parent / "configs" / config
 
     llm_eval.with_options(
-        config_path=str(config_path),
-        enable_cache=not no_cache
+        config_path=str(config_path), enable_cache=not no_cache
     )()
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/llm-complete-guide/steps/finetune_embeddings.py b/llm-complete-guide/steps/finetune_embeddings.py
@@ -23,7 +23,8 @@
     DATASET_NAME_DISTILABEL,
     EMBEDDINGS_MODEL_ID_BASELINE,
     EMBEDDINGS_MODEL_ID_FINE_TUNED,
-    EMBEDDINGS_MODEL_MATRYOSHKA_DIMS, SECRET_NAME,
+    EMBEDDINGS_MODEL_MATRYOSHKA_DIMS,
+    SECRET_NAME,
 )
 from datasets import DatasetDict, concatenate_datasets, load_dataset
 from datasets.arrow_dataset import Dataset
@@ -294,7 +295,7 @@ def finetune(
     trainer.model.push_to_hub(
         f"zenml/{EMBEDDINGS_MODEL_ID_FINE_TUNED}",
         exist_ok=True,
-        token=zenml_client.get_secret(SECRET_NAME).secret_values["hf_token"]
+        token=zenml_client.get_secret(SECRET_NAME).secret_values["hf_token"],
     )
 
     log_model_metadata(
diff --git a/llm-complete-guide/steps/populate_index.py b/llm-complete-guide/steps/populate_index.py
diff --git a/llm-complete-guide/steps/push_to_argilla.py b/llm-complete-guide/steps/push_to_argilla.py

Original file line number	Diff line number	Diff line change
`@@ -15,15 +15,14 @@`
`15`	`15`	`# limitations under the License.`
`16`	`16`	`#`
`17`	`17`
`18`		`-from zenml import pipeline`
`19`		`-`
`20`	`18`	`from steps.populate_index import (`
`21`	`19`	`generate_embeddings,`
`22`	`20`	`index_generator,`
`23`	`21`	`preprocess_documents,`
`24`	`22`	`)`
`25`	`23`	`from steps.url_scraper import url_scraper`
`26`	`24`	`from steps.web_url_loader import web_url_loader`
	`25`	`+from zenml import pipeline`
`27`	`26`
`28`	`27`
`29`	`28`	`@pipeline`