zenml-io · strickvl · May 17, 2025 · May 17, 2025 · May 17, 2025 · May 17, 2025
diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml
@@ -0,0 +1,42 @@
+name: Code Formatting
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+  push:
+    branches:
+      - main
+
+jobs:
+  formatting-check:
+    name: Code Formatting Check
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.draft == false
+    env:
+      ZENML_DEBUG: 1
+      ZENML_ANALYTICS_OPT_IN: false
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
+      - name: Install ruff
+        run: pip install ruff
+
+      - name: Run formatting script
+        run: bash scripts/format.sh
+
+      - name: Check for changes
+        id: git-check
+        run: |
+          git diff --exit-code || echo "changes=true" >> $GITHUB_OUTPUT
+
+      - name: Fail if changes were made
+        if: steps.git-check.outputs.changes == 'true'
+        run: |
+          echo "::error::Formatting check failed. Please run 'scripts/format.sh' locally and commit the changes."
+          exit 1
diff --git a/.github/workflows/gpt4_summarizer.yml b/.github/workflows/gpt4_summarizer.yml
diff --git a/.github/workflows/production_run_complete_llm.yml b/.github/workflows/production_run_complete_llm.yml
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
@@ -1,4 +1,4 @@
-name: Spell Checking
+name: Pull Request Checks
 
 on:
   pull_request:
@@ -25,3 +25,7 @@ jobs:
   markdown-link-check:
     uses: ./.github/workflows/markdown-link-check.yml
     if: github.event.pull_request.draft == false
+
+  code-formatting-check:
+    uses: ./.github/workflows/code-formatting.yml
+    if: github.event.pull_request.draft == false
diff --git a/.github/workflows/staging_run_complete_llm.yml b/.github/workflows/staging_run_complete_llm.yml
diff --git a/.gitignore b/.gitignore
@@ -167,3 +167,6 @@ finetuned-snowflake-arctic-embed-m-v1.5/
 
 # ollama ignores
 nohup.out
+
+# Claude
+.claude/
diff --git a/databricks-production-qa-demo/steps/deployment/deployment_deploy.py b/databricks-production-qa-demo/steps/deployment/deployment_deploy.py
@@ -31,14 +31,10 @@
 
 
 @step(enable_cache=False)
-def deployment_deploy() -> (
-    Annotated[
-        Optional[DatabricksDeploymentService],
-        ArtifactConfig(
-            name="databricks_deployment", is_deployment_artifact=True
-        ),
-    ]
-):
+def deployment_deploy() -> Annotated[
+    Optional[DatabricksDeploymentService],
+    ArtifactConfig(name="databricks_deployment", is_deployment_artifact=True),
+]:
     """Predictions step.
 
     This is an example of a predictions step that takes the data in and returns

diff --git a/gamesense/steps/log_metadata.py b/gamesense/steps/log_metadata.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 
-from typing import Any, Dict
 
 from zenml import get_step_context, log_metadata, step
 
@@ -33,9 +32,8 @@ def log_metadata_from_step_artifact(
     """
 
     context = get_step_context()
-    metadata_dict: Dict[str, Any] = context.pipeline_run.steps[
-        step_name
-    ].outputs[artifact_name]
+    # Access the artifact metadata but don't store the unused variable
+    _ = context.pipeline_run.steps[step_name].outputs[artifact_name]
 
     log_metadata(
         artifact_name=artifact_name,

diff --git a/huggingface-sagemaker/steps/deploying/huggingface_deployment.py b/huggingface-sagemaker/steps/deploying/huggingface_deployment.py
@@ -47,7 +47,9 @@ def deploy_to_huggingface(
     save_model_to_deploy.entrypoint()
 
     logger.info("Model saved locally. Pushing to HuggingFace...")
-    assert secret, "No secret found with name 'huggingface_creds'. Please create one with your `token`."
+    assert secret, (
+        "No secret found with name 'huggingface_creds'. Please create one with your `token`."
+    )
 
     token = secret.secret_values["token"]
     api = HfApi(token=token)

diff --git a/huggingface-sagemaker/steps/promotion/promote_get_metrics.py b/huggingface-sagemaker/steps/promotion/promote_get_metrics.py
@@ -27,12 +27,10 @@
 
 
 @step
-def promote_get_metrics() -> (
-    Tuple[
-        Annotated[Dict[str, Any], "latest_metrics"],
-        Annotated[Dict[str, Any], "current_metrics"],
-    ]
-):
+def promote_get_metrics() -> Tuple[
+    Annotated[Dict[str, Any], "latest_metrics"],
+    Annotated[Dict[str, Any], "current_metrics"],
+]:
     """Get metrics for comparison for promoting a model.
 
     This is an example of a metric retrieval step. It is used to retrieve

diff --git a/llm-complete-guide/steps/eval_retrieval.py b/llm-complete-guide/steps/eval_retrieval.py
@@ -275,9 +275,9 @@ def perform_small_retrieval_evaluation(use_reranking: bool) -> float:
 
 
 @step
-def retrieval_evaluation_small() -> (
-    Annotated[float, "small_failure_rate_retrieval"]
-):
+def retrieval_evaluation_small() -> Annotated[
+    float, "small_failure_rate_retrieval"
+]:
     """Executes the retrieval evaluation step without reranking.
 
     Returns:
@@ -287,9 +287,9 @@ def retrieval_evaluation_small() -> (
 
 
 @step
-def retrieval_evaluation_small_with_reranking() -> (
-    Annotated[float, "small_failure_rate_retrieval_reranking"]
-):
+def retrieval_evaluation_small_with_reranking() -> Annotated[
+    float, "small_failure_rate_retrieval_reranking"
+]:
     """Executes the retrieval evaluation step with reranking.
 
     Returns:

diff --git a/llm-complete-guide/steps/hf_dataset_loader.py b/llm-complete-guide/steps/hf_dataset_loader.py
@@ -23,9 +23,9 @@
 
 
 @step(output_materializers=HFDatasetMaterializer)
-def load_hf_dataset() -> (
-    Tuple[Annotated[Dataset, "train"], Annotated[Dataset, "test"]]
-):
+def load_hf_dataset() -> Tuple[
+    Annotated[Dataset, "train"], Annotated[Dataset, "test"]
+]:
     train_dataset = load_dataset(DATASET_NAME_DEFAULT, split="train")
     test_dataset = load_dataset(DATASET_NAME_DEFAULT, split="test")
     return train_dataset, test_dataset
diff --git a/llm-complete-guide/steps/populate_index.py b/llm-complete-guide/steps/populate_index.py
@@ -117,7 +117,6 @@ def extract_docs_stats(
     num_buckets = 10
     bucket_size = (max_chunk_size - min_chunk_size) / num_buckets
     buckets = [0] * num_buckets
-    bucket_ranges = []
 
     for size in chunk_sizes:
         bucket_index = min(

diff --git a/llm-complete-guide/utils/llm_utils.py b/llm-complete-guide/utils/llm_utils.py
@@ -406,26 +406,9 @@ def get_topn_similar_docs_elasticsearch(
     """
     index_name = "zenml_docs"
 
-    if only_urls:
-        source = ["url"]
-    elif include_metadata:
-        source = ["content", "url", "parent_section"]
-    else:
-        source = ["content"]
-
-    query = {
-        "_source": source,
-        "query": {
-            "script_score": {
-                "query": {"match_all": {}},
-                "script": {
-                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
-                    "params": {"query_vector": query_embedding},
-                },
-            }
-        },
-        "size": n,
-    }
+    # The source fields are determined for use in the results below
+    # based on what the caller requested
+    # but we don't need to store them in a variable since we're using direct knn search
 
     # response = es_client.search(index=index_name, body=query)
     response = es_client.search(

diff --git a/magic-photobooth/frontend.py b/magic-photobooth/frontend.py
@@ -178,9 +178,8 @@ def inference_mode():
         st.warning("No trained models available. Please train a model first.")
         return
 
-    selected_model = st.selectbox(
-        "Choose a trained model", st.session_state.trained_models
-    )
+    # Model selection - value used in later operations
+    st.selectbox("Choose a trained model", st.session_state.trained_models)
     selected_prompt = st.selectbox("Choose a prompt", paris_prompts)
     custom_prompt = st.text_input("Or enter your own prompt")