From 58cb8900e10a628ec1e4fd6abfa0849d72f4470d Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 18:53:47 +0200
Subject: [PATCH 01/13] Format files with latest ruff

---
 .../steps/deployment/deployment_deploy.py          | 12 ++++--------
 .../steps/deploying/huggingface_deployment.py      |  4 +++-
 .../steps/promotion/promote_get_metrics.py         | 10 ++++------
 llm-complete-guide/steps/eval_retrieval.py         | 12 ++++++------
 llm-complete-guide/steps/hf_dataset_loader.py      |  6 +++---
 magic-photobooth/k8s_run.py                        | 12 +++++-------
 magic-photobooth/modal_run.py                      | 12 +++++-------
 magic-photobooth/modal_run_using_azure_data.py     | 12 +++++-------
 oncoclear/steps/model_promoter.py                  |  2 +-
 retail-forecast/pipelines/training_pipeline.py     | 14 ++++++--------
 .../steps/model_loader.py                          |  6 +++---
 .../steps/model_promoter.py                        |  2 +-
 .../steps/model_trainer.py                         |  8 ++++----
 13 files changed, 50 insertions(+), 62 deletions(-)

diff --git a/databricks-production-qa-demo/steps/deployment/deployment_deploy.py b/databricks-production-qa-demo/steps/deployment/deployment_deploy.py
index c8220afe9..b7407dcfb 100644
--- a/databricks-production-qa-demo/steps/deployment/deployment_deploy.py
+++ b/databricks-production-qa-demo/steps/deployment/deployment_deploy.py
@@ -31,14 +31,10 @@
 
 
 @step(enable_cache=False)
-def deployment_deploy() -> (
-    Annotated[
-        Optional[DatabricksDeploymentService],
-        ArtifactConfig(
-            name="databricks_deployment", is_deployment_artifact=True
-        ),
-    ]
-):
+def deployment_deploy() -> Annotated[
+    Optional[DatabricksDeploymentService],
+    ArtifactConfig(name="databricks_deployment", is_deployment_artifact=True),
+]:
     """Predictions step.
 
     This is an example of a predictions step that takes the data in and returns
diff --git a/huggingface-sagemaker/steps/deploying/huggingface_deployment.py b/huggingface-sagemaker/steps/deploying/huggingface_deployment.py
index 89d7305fe..33adcf81d 100644
--- a/huggingface-sagemaker/steps/deploying/huggingface_deployment.py
+++ b/huggingface-sagemaker/steps/deploying/huggingface_deployment.py
@@ -47,7 +47,9 @@ def deploy_to_huggingface(
     save_model_to_deploy.entrypoint()
 
     logger.info("Model saved locally. Pushing to HuggingFace...")
-    assert secret, "No secret found with name 'huggingface_creds'. Please create one with your `token`."
+    assert secret, (
+        "No secret found with name 'huggingface_creds'. Please create one with your `token`."
+    )
 
     token = secret.secret_values["token"]
     api = HfApi(token=token)
diff --git a/huggingface-sagemaker/steps/promotion/promote_get_metrics.py b/huggingface-sagemaker/steps/promotion/promote_get_metrics.py
index 93cebad1b..06473701c 100644
--- a/huggingface-sagemaker/steps/promotion/promote_get_metrics.py
+++ b/huggingface-sagemaker/steps/promotion/promote_get_metrics.py
@@ -27,12 +27,10 @@
 
 
 @step
-def promote_get_metrics() -> (
-    Tuple[
-        Annotated[Dict[str, Any], "latest_metrics"],
-        Annotated[Dict[str, Any], "current_metrics"],
-    ]
-):
+def promote_get_metrics() -> Tuple[
+    Annotated[Dict[str, Any], "latest_metrics"],
+    Annotated[Dict[str, Any], "current_metrics"],
+]:
     """Get metrics for comparison for promoting a model.
 
     This is an example of a metric retrieval step. It is used to retrieve
diff --git a/llm-complete-guide/steps/eval_retrieval.py b/llm-complete-guide/steps/eval_retrieval.py
index bf3594f8a..ebec42b50 100644
--- a/llm-complete-guide/steps/eval_retrieval.py
+++ b/llm-complete-guide/steps/eval_retrieval.py
@@ -275,9 +275,9 @@ def perform_small_retrieval_evaluation(use_reranking: bool) -> float:
 
 
 @step
-def retrieval_evaluation_small() -> (
-    Annotated[float, "small_failure_rate_retrieval"]
-):
+def retrieval_evaluation_small() -> Annotated[
+    float, "small_failure_rate_retrieval"
+]:
     """Executes the retrieval evaluation step without reranking.
 
     Returns:
@@ -287,9 +287,9 @@ def retrieval_evaluation_small() -> (
 
 
 @step
-def retrieval_evaluation_small_with_reranking() -> (
-    Annotated[float, "small_failure_rate_retrieval_reranking"]
-):
+def retrieval_evaluation_small_with_reranking() -> Annotated[
+    float, "small_failure_rate_retrieval_reranking"
+]:
     """Executes the retrieval evaluation step with reranking.
 
     Returns:
diff --git a/llm-complete-guide/steps/hf_dataset_loader.py b/llm-complete-guide/steps/hf_dataset_loader.py
index 5615ba4a4..0c7777573 100644
--- a/llm-complete-guide/steps/hf_dataset_loader.py
+++ b/llm-complete-guide/steps/hf_dataset_loader.py
@@ -23,9 +23,9 @@
 
 
 @step(output_materializers=HFDatasetMaterializer)
-def load_hf_dataset() -> (
-    Tuple[Annotated[Dataset, "train"], Annotated[Dataset, "test"]]
-):
+def load_hf_dataset() -> Tuple[
+    Annotated[Dataset, "train"], Annotated[Dataset, "test"]
+]:
     train_dataset = load_dataset(DATASET_NAME_DEFAULT, split="train")
     test_dataset = load_dataset(DATASET_NAME_DEFAULT, split="test")
     return train_dataset, test_dataset
diff --git a/magic-photobooth/k8s_run.py b/magic-photobooth/k8s_run.py
index 5f19c702e..4e1d96399 100644
--- a/magic-photobooth/k8s_run.py
+++ b/magic-photobooth/k8s_run.py
@@ -326,13 +326,11 @@ def generate_video_frames(
     settings={"orchestrator.kubernetes": kubernetes_settings},
     enable_cache=False,
 )
-def image_to_video() -> (
-    Tuple[
-        Annotated[PILImage.Image, "generated_image"],
-        Annotated[bytes, "video_data"],
-        Annotated[HTMLString, "video_html"],
-    ]
-):
+def image_to_video() -> Tuple[
+    Annotated[PILImage.Image, "generated_image"],
+    Annotated[bytes, "video_data"],
+    Annotated[HTMLString, "video_html"],
+]:
     model_path = f"{TrainConfig().hf_username}/{TrainConfig().hf_repo_suffix}"
 
     pipe = AutoPipelineForText2Image.from_pretrained(
diff --git a/magic-photobooth/modal_run.py b/magic-photobooth/modal_run.py
index ee1409ff0..5cf49ced2 100644
--- a/magic-photobooth/modal_run.py
+++ b/magic-photobooth/modal_run.py
@@ -308,13 +308,11 @@ def generate_video_frames(
     settings={"step_operator.modal": modal_settings},
     enable_cache=False,
 )
-def image_to_video() -> (
-    Tuple[
-        Annotated[PILImage.Image, "generated_image"],
-        Annotated[bytes, "video_data"],
-        Annotated[HTMLString, "video_html"],
-    ]
-):
+def image_to_video() -> Tuple[
+    Annotated[PILImage.Image, "generated_image"],
+    Annotated[bytes, "video_data"],
+    Annotated[HTMLString, "video_html"],
+]:
     model_path = f"{TrainConfig().hf_username}/{TrainConfig().hf_repo_suffix}"
 
     pipe = AutoPipelineForText2Image.from_pretrained(
diff --git a/magic-photobooth/modal_run_using_azure_data.py b/magic-photobooth/modal_run_using_azure_data.py
index 1c91fc16f..561888ddf 100644
--- a/magic-photobooth/modal_run_using_azure_data.py
+++ b/magic-photobooth/modal_run_using_azure_data.py
@@ -300,13 +300,11 @@ def generate_video_frames(
     settings={"step_operator.modal": modal_settings},
     enable_cache=False,
 )
-def image_to_video() -> (
-    Tuple[
-        Annotated[PILImage.Image, "generated_image"],
-        Annotated[bytes, "video_data"],
-        Annotated[HTMLString, "video_html"],
-    ]
-):
+def image_to_video() -> Tuple[
+    Annotated[PILImage.Image, "generated_image"],
+    Annotated[bytes, "video_data"],
+    Annotated[HTMLString, "video_html"],
+]:
     model_path = f"{TrainConfig().hf_username}/{TrainConfig().hf_repo_suffix}"
 
     pipe = AutoPipelineForText2Image.from_pretrained(
diff --git a/oncoclear/steps/model_promoter.py b/oncoclear/steps/model_promoter.py
index ca73c472f..0c570488d 100644
--- a/oncoclear/steps/model_promoter.py
+++ b/oncoclear/steps/model_promoter.py
@@ -44,7 +44,7 @@ def model_promoter(accuracy: float, stage: str = "production") -> bool:
 
     if accuracy < 0.8:
         logger.info(
-            f"Model accuracy {accuracy*100:.2f}% is below 80% ! Not promoting model."
+            f"Model accuracy {accuracy * 100:.2f}% is below 80% ! Not promoting model."
         )
     else:
         logger.info(f"Model promoted to {stage}!")
diff --git a/retail-forecast/pipelines/training_pipeline.py b/retail-forecast/pipelines/training_pipeline.py
index 047948383..26a68f7dd 100644
--- a/retail-forecast/pipelines/training_pipeline.py
+++ b/retail-forecast/pipelines/training_pipeline.py
@@ -12,14 +12,12 @@
 
 
 @pipeline(name="retail_forecast_pipeline")
-def training_pipeline() -> (
-    Tuple[
-        Annotated[Dict[str, float], "model_metrics"],
-        Annotated[HTMLString, "evaluation_report"],
-        Annotated[HTMLString, "forecast_dashboard"],
-        Annotated[HTMLString, "sales_visualization"],
-    ]
-):
+def training_pipeline() -> Tuple[
+    Annotated[Dict[str, float], "model_metrics"],
+    Annotated[HTMLString, "evaluation_report"],
+    Annotated[HTMLString, "forecast_dashboard"],
+    Annotated[HTMLString, "sales_visualization"],
+]:
     """Simple retail forecasting pipeline using Prophet.
 
     Steps:
diff --git a/sign-language-detection-yolov5/steps/model_loader.py b/sign-language-detection-yolov5/steps/model_loader.py
index 1b8abffb4..f416539f5 100644
--- a/sign-language-detection-yolov5/steps/model_loader.py
+++ b/sign-language-detection-yolov5/steps/model_loader.py
@@ -21,9 +21,9 @@
 
 
 @step
-def model_loader() -> (
-    Tuple[Annotated[str, "model_path"], Annotated[torch.nn.Module, "model"]]
-):
+def model_loader() -> Tuple[
+    Annotated[str, "model_path"], Annotated[torch.nn.Module, "model"]
+]:
     """Loads the trained models from previous training pipeline runs."""
     training_pipeline = Client().get_pipeline(
         "sign_language_detection_train_pipeline"
diff --git a/vertex-registry-and-deployer/steps/model_promoter.py b/vertex-registry-and-deployer/steps/model_promoter.py
index b9a2abe41..d87ab3709 100644
--- a/vertex-registry-and-deployer/steps/model_promoter.py
+++ b/vertex-registry-and-deployer/steps/model_promoter.py
@@ -44,7 +44,7 @@ def model_promoter(accuracy: float, stage: str = "production") -> bool:
 
     if accuracy < 0.8:
         logger.info(
-            f"Model accuracy {accuracy*100:.2f}% is below 80% ! Not promoting model."
+            f"Model accuracy {accuracy * 100:.2f}% is below 80% ! Not promoting model."
         )
     else:
         logger.info(f"Model promoted to {stage}!")
diff --git a/vertex-registry-and-deployer/steps/model_trainer.py b/vertex-registry-and-deployer/steps/model_trainer.py
index eb4ed41e8..1a02e16f2 100644
--- a/vertex-registry-and-deployer/steps/model_trainer.py
+++ b/vertex-registry-and-deployer/steps/model_trainer.py
@@ -90,17 +90,17 @@ def model_trainer(
     # Evaluate the model
     trn_acc = model.score(X_trn, y_trn)
     tst_acc = model.score(X_tst, y_tst)
-    logger.info(f"Train accuracy={trn_acc*100:.2f}%")
-    logger.info(f"Test accuracy={tst_acc*100:.2f}%")
+    logger.info(f"Train accuracy={trn_acc * 100:.2f}%")
+    logger.info(f"Test accuracy={tst_acc * 100:.2f}%")
 
     messages = []
     if trn_acc < min_train_accuracy:
         messages.append(
-            f"Train accuracy {trn_acc*100:.2f}% is below {min_train_accuracy*100:.2f}%!"
+            f"Train accuracy {trn_acc * 100:.2f}% is below {min_train_accuracy * 100:.2f}%!"
         )
     if tst_acc < min_test_accuracy:
         messages.append(
-            f"Test accuracy {tst_acc*100:.2f}% is below {min_test_accuracy*100:.2f}%!"
+            f"Test accuracy {tst_acc * 100:.2f}% is below {min_test_accuracy * 100:.2f}%!"
         )
     else:
         for message in messages:

From a029351b9553f43baffea3b8616a30a3be6086cb Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 18:55:03 +0200
Subject: [PATCH 02/13] Add .claude/ to gitignore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 456f7e6e1..851a4f94b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,3 +167,6 @@ finetuned-snowflake-arctic-embed-m-v1.5/
 
 # ollama ignores
 nohup.out
+
+# Claude
+.claude/

From 0a1791399127475e011ae8f8f56ee73b26a31fdc Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:04:04 +0200
Subject: [PATCH 03/13] Fix unused variables and add notebook exclusion to
 format script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix unused variables across multiple Python files
- Fix syntax error in zencoder/test_starcoder_bigcode.py
- Exclude Jupyter notebooks from linting checks in format.sh

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 gamesense/steps/log_metadata.py               |  6 ++--
 llm-complete-guide/steps/populate_index.py    |  1 -
 llm-complete-guide/utils/llm_utils.py         | 23 ++------------
 magic-photobooth/frontend.py                  |  5 ++--
 .../modal_run_using_azure_data.py             |  2 +-
 scripts/format.sh                             |  2 +-
 zencoder/pipelines/generate_code_dataset.py   |  2 +-
 zencoder/steps/deployment.py                  |  2 +-
 zencoder/test_starcoder_bigcode.py            | 30 +++++++++----------
 zencoder/test_zencoder.py                     |  2 +-
 zenml-support-agent/steps/agent_creator.py    |  3 +-
 11 files changed, 29 insertions(+), 49 deletions(-)

diff --git a/gamesense/steps/log_metadata.py b/gamesense/steps/log_metadata.py
index 8398bccac..3c83a127a 100644
--- a/gamesense/steps/log_metadata.py
+++ b/gamesense/steps/log_metadata.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 
-from typing import Any, Dict
 
 from zenml import get_step_context, log_metadata, step
 
@@ -33,9 +32,8 @@ def log_metadata_from_step_artifact(
     """
 
     context = get_step_context()
-    metadata_dict: Dict[str, Any] = context.pipeline_run.steps[
-        step_name
-    ].outputs[artifact_name]
+    # Access the artifact metadata but don't store the unused variable
+    _ = context.pipeline_run.steps[step_name].outputs[artifact_name]
 
     log_metadata(
         artifact_name=artifact_name,
diff --git a/llm-complete-guide/steps/populate_index.py b/llm-complete-guide/steps/populate_index.py
index c3d197caa..009a83e6d 100644
--- a/llm-complete-guide/steps/populate_index.py
+++ b/llm-complete-guide/steps/populate_index.py
@@ -117,7 +117,6 @@ def extract_docs_stats(
     num_buckets = 10
     bucket_size = (max_chunk_size - min_chunk_size) / num_buckets
     buckets = [0] * num_buckets
-    bucket_ranges = []
 
     for size in chunk_sizes:
         bucket_index = min(
diff --git a/llm-complete-guide/utils/llm_utils.py b/llm-complete-guide/utils/llm_utils.py
index ca9e776b3..cfcfeb9bb 100644
--- a/llm-complete-guide/utils/llm_utils.py
+++ b/llm-complete-guide/utils/llm_utils.py
@@ -406,26 +406,9 @@ def get_topn_similar_docs_elasticsearch(
     """
     index_name = "zenml_docs"
 
-    if only_urls:
-        source = ["url"]
-    elif include_metadata:
-        source = ["content", "url", "parent_section"]
-    else:
-        source = ["content"]
-
-    query = {
-        "_source": source,
-        "query": {
-            "script_score": {
-                "query": {"match_all": {}},
-                "script": {
-                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
-                    "params": {"query_vector": query_embedding},
-                },
-            }
-        },
-        "size": n,
-    }
+    # The source fields are determined for use in the results below
+    # based on what the caller requested
+    # but we don't need to store them in a variable since we're using direct knn search
 
     # response = es_client.search(index=index_name, body=query)
     response = es_client.search(
diff --git a/magic-photobooth/frontend.py b/magic-photobooth/frontend.py
index ffbb8730a..a369b70e6 100644
--- a/magic-photobooth/frontend.py
+++ b/magic-photobooth/frontend.py
@@ -178,9 +178,8 @@ def inference_mode():
         st.warning("No trained models available. Please train a model first.")
         return
 
-    selected_model = st.selectbox(
-        "Choose a trained model", st.session_state.trained_models
-    )
+    # Model selection - value used in later operations
+    st.selectbox("Choose a trained model", st.session_state.trained_models)
     selected_prompt = st.selectbox("Choose a prompt", paris_prompts)
     custom_prompt = st.text_input("Or enter your own prompt")
 
diff --git a/magic-photobooth/modal_run_using_azure_data.py b/magic-photobooth/modal_run_using_azure_data.py
index 561888ddf..ff53fba5b 100644
--- a/magic-photobooth/modal_run_using_azure_data.py
+++ b/magic-photobooth/modal_run_using_azure_data.py
@@ -360,7 +360,7 @@ def image_to_video() -> Tuple[
     enable_cache=False,
 )
 def dreambooth_pipeline():
-    data = load_data()
+    _ = load_data()
     # train_model(data, after="load_data")
     # batch_inference(after="train_model")
     # image_to_video(after="batch_inference")
diff --git a/scripts/format.sh b/scripts/format.sh
index c944f607c..a5b177a22 100755
--- a/scripts/format.sh
+++ b/scripts/format.sh
@@ -18,7 +18,7 @@ export ZENML_DEBUG=1
 export ZENML_ANALYTICS_OPT_IN=false
 
 # autoflake replacement: removes unused imports and variables
-ruff check $SRC --select F401,F841 --fix --exclude "__init__.py" --exclude "llm-finetuning/" --exclude "sign-language-detection-yolov5/model.py" --isolated
+ruff check $SRC --select F401,F841 --fix --exclude "__init__.py" --exclude "llm-finetuning/" --exclude "sign-language-detection-yolov5/model.py" --exclude "*.ipynb" --isolated
 
 # sorts imports
 ruff check $SRC --exclude "llm-finetuning/" --exclude "sign-language-detection-yolov5/model.py" --select I --fix --ignore D
diff --git a/zencoder/pipelines/generate_code_dataset.py b/zencoder/pipelines/generate_code_dataset.py
index 666230f08..800e3c467 100644
--- a/zencoder/pipelines/generate_code_dataset.py
+++ b/zencoder/pipelines/generate_code_dataset.py
@@ -33,4 +33,4 @@ def generate_code_dataset(dataset_id: str):
     # Link all the steps together by calling them and passing the output
     # of one step as the input of the next step.
     mirror_directory = mirror_repositories()
-    repo_id = prepare_dataset(mirror_directory, dataset_id)
+    prepare_dataset(mirror_directory, dataset_id)
diff --git a/zencoder/steps/deployment.py b/zencoder/steps/deployment.py
index 5db13ea9d..a62aaf1a4 100644
--- a/zencoder/steps/deployment.py
+++ b/zencoder/steps/deployment.py
@@ -38,7 +38,7 @@ def deploy_model_to_hf_hub(hf_endpoint_cfg: Optional[Dict] = None) -> None:
         hf_endpoint_cfg: The configuration for the Huggingface endpoint.
 
     """
-    endpoint_name = None
+    # Endpoint name is managed by the HuggingFace service
     hf_endpoint_cfg = HuggingFaceServiceConfig(**hf_endpoint_cfg)
 
     secret = Client().get_secret("huggingface_creds")
diff --git a/zencoder/test_starcoder_bigcode.py b/zencoder/test_starcoder_bigcode.py
index bcdd00ad5..39f3245a3 100644
--- a/zencoder/test_starcoder_bigcode.py
+++ b/zencoder/test_starcoder_bigcode.py
@@ -1,13 +1,13 @@
-# Write a zenml pipeline that loads sklearn iris dataset and builds a sklearn classifier 
+# Write a zenml pipeline that loads sklearn iris dataset and builds a sklearn classifier
 
 from zenml.pipelines import pipeline
-from zenml.steps.preprocesser import StandardPreprocesser
-from zenml.steps.split import RandomSplit
 from zenml.steps.evaluator import TFMAEvaluator
+from zenml.steps.preprocesser import StandardPreprocesser
+from zenml.steps.preprocesser.standard_preprocesser.standard_preprocesser import (
+    StandardPreprocesser,
+)
 from zenml.steps.trainer import TFFeed
-from zenml.steps.deployer import TFServingDeployer
-from zenml.steps.preprocesser.standard_preprocesser.standard_preprocesser import \
-    StandardPreprocesser
+
 
 @pipeline
 def tf_mnist_pipeline(epochs: int = 5, lr: float = 0.001):
@@ -15,16 +15,16 @@ def tf_mnist_pipeline(epochs: int = 5, lr: float = 0.001):
     # Link all the steps together by calling them and passing the output
     # of one step as the input
 
-#     x_train, x_test, y_train, y_test = RandomSplit(test_size=0.2)(
-#         dataset=iris_data_loader()
-#     )
+    # x_train, x_test, y_train, y_test = RandomSplit(test_size=0.2)(
+    #     dataset=iris_data_loader()
+    # )
     x_train, x_test, y_train, y_test = StandardPreprocesser(
         test_size=0.2,
         random_state=42,
-    )(
-        dataset=iris_data_loader()
-    )
-    model = TFFeed(epochs=epochs, lr=lr)(
-        x_train=x_train
+    )(dataset=iris_data_loader())
+    model = TFFeed(epochs=epochs, lr=lr)(x_train=x_train, y_train=y_train)
+
+    # Complete the pipeline with evaluation or deployment steps
+    metrics = TFMAEvaluator()(model=model, x_test=x_test, y_test=y_test)
 
-        
+    return model, metrics
diff --git a/zencoder/test_zencoder.py b/zencoder/test_zencoder.py
index 817032683..79733c1db 100644
--- a/zencoder/test_zencoder.py
+++ b/zencoder/test_zencoder.py
@@ -30,4 +30,4 @@ def trainer(df: pd.DataFrame) -> Any:
 @pipeline
 def sklearn_pipeline():
     df = importer()
-    model = trainer(df)
+    trainer(df)
diff --git a/zenml-support-agent/steps/agent_creator.py b/zenml-support-agent/steps/agent_creator.py
index 1e6324c99..52a69530d 100644
--- a/zenml-support-agent/steps/agent_creator.py
+++ b/zenml-support-agent/steps/agent_creator.py
@@ -77,7 +77,8 @@ def agent_creator(
         human_message=SUFFIX,
     )
 
-    agent_executor = AgentExecutor.from_agent_and_tools(
+    # Create but don't store the agent executor - just return the agent and tools
+    AgentExecutor.from_agent_and_tools(
         agent=my_agent,
         tools=tools,
         verbose=True,

From a02a30b25b7db667876b1a1b57b63d12c0b8217b Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:07:06 +0200
Subject: [PATCH 04/13] remove unused CI workflows

---
 .github/workflows/gpt4_summarizer.yml         | 36 ----------
 .../workflows/production_run_complete_llm.yml | 60 ----------------
 .../workflows/staging_run_complete_llm.yml    | 68 -------------------
 3 files changed, 164 deletions(-)
 delete mode 100644 .github/workflows/gpt4_summarizer.yml
 delete mode 100644 .github/workflows/production_run_complete_llm.yml
 delete mode 100644 .github/workflows/staging_run_complete_llm.yml

diff --git a/.github/workflows/gpt4_summarizer.yml b/.github/workflows/gpt4_summarizer.yml
deleted file mode 100644
index 6288cf74c..000000000
--- a/.github/workflows/gpt4_summarizer.yml
+++ /dev/null
@@ -1,36 +0,0 @@
-name: Summarize Supabase with GPT-4 and ZenML
-
-on:
-  push:
-    branches:
-      - project/new-summary-project
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    env:
-      ZENML_ANALYTICS_OPT_IN: false
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v3
-
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.8
-
-    - name: Log into GCP
-      uses: 'google-github-actions/auth@v1'
-      with:
-        credentials_json: ${{ secrets.GCP_SA_KEY }}
-
-    - name: Run ZenML Pipeline
-      run: |
-        cd supabase-openai-summary/src
-        pip install -r requirements.txt
-        zenml init
-        zenml integration install gcp slack -y
-        zenml connect --url ${{ secrets.ZENML_URL }} --username ${{ secrets.ZENML_USERNAME }} --password ${{ secrets.ZENML_PASSWORD }}
-        zenml stack set ${{ secrets.ZENML_STACK }}
-        python run.py
diff --git a/.github/workflows/production_run_complete_llm.yml b/.github/workflows/production_run_complete_llm.yml
deleted file mode 100644
index 223c09858..000000000
--- a/.github/workflows/production_run_complete_llm.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-name: Production LLM-COMPLETE
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - 'llm-complete-guide/**'
-concurrency:
-  # New commit on branch cancels running workflows of the same branch
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  run-production-workflow:
-    runs-on: ubuntu-latest
-    if: github.event.pull_request.draft == false
-    env:
-      ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }}
-      ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
-      ZENML_PRODUCTION_STACK: b3951d43-0fb2-4d32-89c5-3399374e7c7e # Set this to your production stack ID
-      ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
-      ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
-      ZENML_DEBUG: true
-      ZENML_ANALYTICS_OPT_IN: false
-      ZENML_LOGGING_VERBOSITY: INFO
-      ZENML_PROJECT_SECRET_NAME: llm-complete
-      ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True
-      ZENML_EVENT_SOURCE_ID: ae6ae536-d811-4838-a44b-744b768a0f31  # Set this to your preferred event source ID
-      ZENML_SERVICE_ACCOUNT_ID: fef76af2-382f-4ab2-9e6b-5eb85a303f0e  # Set this to your service account ID or delete
-
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
-        with:
-          python-version: '3.11'
-
-      - name: Install requirements
-        working-directory: ./llm-complete-guide
-        run: |
-          pip3 install uv
-          uv pip install -r requirements.txt --system
-          uv pip install -r requirements-argilla.txt --system
-          zenml integration install gcp -y --uv
-
-      - name: Connect to ZenML server
-        working-directory: ./llm-complete-guide
-        run: |
-          zenml init
-
-      - name: Set stack (Production)
-        working-directory: ./llm-complete-guide
-        run: |
-          zenml stack set ${{ env.ZENML_PRODUCTION_STACK }}
-
-      - name: Run pipeline, create pipeline, configure trigger (Production)
-        working-directory: ./llm-complete-guide
-        run: |
-          python gh_action_rag.py --no-cache --create-template --event-source-id ${{ env.ZENML_EVENT_SOURCE_ID }} --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --config production/rag.yaml --zenml-model-version production
diff --git a/.github/workflows/staging_run_complete_llm.yml b/.github/workflows/staging_run_complete_llm.yml
deleted file mode 100644
index a1f831fa2..000000000
--- a/.github/workflows/staging_run_complete_llm.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: Staging Trigger LLM-COMPLETE
-on:
-  pull_request:
-    types: [opened, synchronize]
-    branches: [staging, main]
-concurrency:
-  # New commit on branch cancels running workflows of the same branch
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  run-staging-workflow:
-    runs-on: ubuntu-latest
-    if: github.event.pull_request.draft == false
-    env:
-      ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }}
-      ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
-      ZENML_STAGING_STACK : 67166d73-a44e-42f9-b67f-011e9afab9b5 # Set this to your staging stack ID
-      ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
-      ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
-      ZENML_DEBUG: true
-      ZENML_ANALYTICS_OPT_IN: false
-      ZENML_LOGGING_VERBOSITY: INFO
-      ZENML_PROJECT_SECRET_NAME: llm-complete
-      ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True
-
-    steps:
-      - name: Check out repository code
-        uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
-        with:
-          python-version: '3.11'
-
-      - name: Install requirements
-        working-directory: ./llm-complete-guide
-        run: |
-          pip3 install uv
-          uv pip install -r requirements.txt --system
-          uv pip install -r requirements-argilla.txt --system
-          zenml integration install aws s3 -y --uv
-
-      - name: Connect to ZenML server
-        working-directory: ./llm-complete-guide
-        run: |
-          zenml init
-
-      - name: List and describe ZenML projects
-        working-directory: ./llm-complete-guide
-        run: |
-          zenml project list || echo "Could not list projects"
-          zenml project describe || echo "Failed to describe project"
-
-      - name: Register Set ZenML project
-        working-directory: ./llm-complete-guide
-        run: |
-          zenml project register llm-complete-guide || echo "Failed to register project"
-          zenml project set llm-complete-guide || echo "Failed to set project"
-
-      - name: Set stack (Staging)
-        working-directory: ./llm-complete-guide
-        run: |
-          zenml stack set ${{ env.ZENML_STAGING_STACK }}
-
-      - name: Run pipeline (Staging)
-        working-directory: ./llm-complete-guide
-        run: |
-          python gh_action_rag.py --no-cache --config staging/rag.yaml --zenml-model-version staging

From ec67513bd9d17c6358a5f11b45178f486030806b Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:09:49 +0200
Subject: [PATCH 05/13] Add code formatting CI workflow

- Created new GitHub Actions workflow 'code-formatting.yml' that runs the formatting script
- Workflow checks if code meets formatting standards and fails if any changes are needed
- Updated pull_request.yml to include the new formatting check
- Ensures PRs cannot be merged until code passes formatting checks
---
 .github/workflows/code-formatting.yml | 42 +++++++++++++++++++++++++++
 .github/workflows/pull_request.yml    |  6 +++-
 2 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/code-formatting.yml

diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml
new file mode 100644
index 000000000..1e5b1070b
--- /dev/null
+++ b/.github/workflows/code-formatting.yml
@@ -0,0 +1,42 @@
+name: Code Formatting
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+  push:
+    branches:
+      - main
+
+jobs:
+  formatting-check:
+    name: Code Formatting Check
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.draft == false
+    env:
+      ZENML_DEBUG: 1
+      ZENML_ANALYTICS_OPT_IN: false
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+      
+      - name: Install ruff
+        run: pip install ruff
+      
+      - name: Run formatting script
+        run: bash scripts/format.sh
+        
+      - name: Check for changes
+        id: git-check
+        run: |
+          git diff --exit-code || echo "changes=true" >> $GITHUB_OUTPUT
+        
+      - name: Fail if changes were made
+        if: steps.git-check.outputs.changes == 'true'
+        run: |
+          echo "::error::Formatting check failed. Please run 'scripts/format.sh' locally and commit the changes."
+          exit 1
\ No newline at end of file
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index bad68db6f..15796f797 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -1,4 +1,4 @@
-name: Spell Checking
+name: Pull Request Checks
 
 on:
   pull_request:
@@ -25,3 +25,7 @@ jobs:
   markdown-link-check:
     uses: ./.github/workflows/markdown-link-check.yml
     if: github.event.pull_request.draft == false
+    
+  code-formatting-check:
+    uses: ./.github/workflows/code-formatting.yml
+    if: github.event.pull_request.draft == false

From 50c338a4494860b4872a23fc34665e259cc57f5e Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:15:36 +0200
Subject: [PATCH 06/13] Fix workflow_call trigger in code-formatting.yml

---
 .github/workflows/code-formatting.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml
index 1e5b1070b..f8e4a6c96 100644
--- a/.github/workflows/code-formatting.yml
+++ b/.github/workflows/code-formatting.yml
@@ -1,6 +1,7 @@
 name: Code Formatting
 
 on:
+  workflow_call:
   pull_request:
     types: [opened, synchronize]
   push:

From 9f2a4f30aea0b068caf1e566b5d6a35255e1c7eb Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <strickvl@users.noreply.github.com>
Date: Sat, 17 May 2025 19:16:09 +0200
Subject: [PATCH 07/13] Update llm-complete-guide/utils/llm_utils.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 llm-complete-guide/utils/llm_utils.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llm-complete-guide/utils/llm_utils.py b/llm-complete-guide/utils/llm_utils.py
index cfcfeb9bb..3d412af03 100644
--- a/llm-complete-guide/utils/llm_utils.py
+++ b/llm-complete-guide/utils/llm_utils.py
@@ -406,11 +406,6 @@ def get_topn_similar_docs_elasticsearch(
     """
     index_name = "zenml_docs"
 
-    # The source fields are determined for use in the results below
-    # based on what the caller requested
-    # but we don't need to store them in a variable since we're using direct knn search
-
-    # response = es_client.search(index=index_name, body=query)
     response = es_client.search(
         index=index_name,
         knn={

From 0ccb53e62de8ab92a6481640aaf1397209e98d48 Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <strickvl@users.noreply.github.com>
Date: Sat, 17 May 2025 19:16:31 +0200
Subject: [PATCH 08/13] Update magic-photobooth/frontend.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 magic-photobooth/frontend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/magic-photobooth/frontend.py b/magic-photobooth/frontend.py
index a369b70e6..67a06dc9e 100644
--- a/magic-photobooth/frontend.py
+++ b/magic-photobooth/frontend.py
@@ -179,7 +179,7 @@ def inference_mode():
         return
 
     # Model selection - value used in later operations
-    st.selectbox("Choose a trained model", st.session_state.trained_models)
+    selected_model = st.selectbox("Choose a trained model", st.session_state.trained_models)
     selected_prompt = st.selectbox("Choose a prompt", paris_prompts)
     custom_prompt = st.text_input("Or enter your own prompt")
 

From e7d80c29e86360bd2732cc43349bce7ea53c99c8 Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:17:57 +0200
Subject: [PATCH 09/13] small redundancy fix

---
 .github/workflows/code-formatting.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml
index f8e4a6c96..2b94f4874 100644
--- a/.github/workflows/code-formatting.yml
+++ b/.github/workflows/code-formatting.yml
@@ -2,8 +2,6 @@ name: Code Formatting
 
 on:
   workflow_call:
-  pull_request:
-    types: [opened, synchronize]
   push:
     branches:
       - main
@@ -40,4 +38,4 @@ jobs:
         if: steps.git-check.outputs.changes == 'true'
         run: |
           echo "::error::Formatting check failed. Please run 'scripts/format.sh' locally and commit the changes."
-          exit 1
\ No newline at end of file
+          exit 1

From c1e61f6dc53f97e0705198d5dccb80cceb10f040 Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:22:29 +0200
Subject: [PATCH 10/13] Update CI to install latest ruff version and display
 version in logs

---
 .github/workflows/code-formatting.yml | 4 ++--
 magic-photobooth/frontend.py          | 4 +++-
 scripts/format.sh                     | 4 ++++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml
index 2b94f4874..4131e9115 100644
--- a/.github/workflows/code-formatting.yml
+++ b/.github/workflows/code-formatting.yml
@@ -23,8 +23,8 @@ jobs:
         with:
           python-version: '3.9'
       
-      - name: Install ruff
-        run: pip install ruff
+      - name: Install latest ruff
+        run: pip install --upgrade ruff
       
       - name: Run formatting script
         run: bash scripts/format.sh
diff --git a/magic-photobooth/frontend.py b/magic-photobooth/frontend.py
index 67a06dc9e..be2d7d9ac 100644
--- a/magic-photobooth/frontend.py
+++ b/magic-photobooth/frontend.py
@@ -179,7 +179,9 @@ def inference_mode():
         return
 
     # Model selection - value used in later operations
-    selected_model = st.selectbox("Choose a trained model", st.session_state.trained_models)
+    selected_model = st.selectbox(
+        "Choose a trained model", st.session_state.trained_models
+    )
     selected_prompt = st.selectbox("Choose a prompt", paris_prompts)
     custom_prompt = st.text_input("Or enter your own prompt")
 
diff --git a/scripts/format.sh b/scripts/format.sh
index a5b177a22..a764e2a29 100755
--- a/scripts/format.sh
+++ b/scripts/format.sh
@@ -17,6 +17,10 @@ fi
 export ZENML_DEBUG=1
 export ZENML_ANALYTICS_OPT_IN=false
 
+# Print ruff version for debugging
+echo "Using ruff version:"
+ruff --version
+
 # autoflake replacement: removes unused imports and variables
 ruff check $SRC --select F401,F841 --fix --exclude "__init__.py" --exclude "llm-finetuning/" --exclude "sign-language-detection-yolov5/model.py" --exclude "*.ipynb" --isolated
 

From 50ad613cd590f963a8675341a9c1391dde7876f0 Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:25:39 +0200
Subject: [PATCH 11/13] Update README and CONTRIBUTING.md with formatting
 requirements

---
 CONTRIBUTING.md | 17 +++++++++++++++--
 README.md       |  9 +++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6ab572518..94b88803f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -99,10 +99,23 @@ the ["fork-and-pull" Git workflow](https://github.com/susam/gitpr)
 4. Checkout the **main** branch <- `git checkout main`.
 5. Create a branch locally off the **main** branch with a succinct but descriptive name.
 6. Commit changes to the branch.
-7. Push changes to your fork.
-8. Open a PR in our repository to the `main` branch and
+7. Format your code by running `bash scripts/format.sh` before committing.
+8. Push changes to your fork.
+9. Open a PR in our repository to the `main` branch and
    follow the PR template so that we can efficiently review the changes.
 
+#### Code Formatting
+
+All code must pass our formatting checks before it can be merged. We use [ruff](https://github.com/astral-sh/ruff) for code formatting and linting.
+
+To format your code locally:
+```bash
+# Run from the project root
+bash scripts/format.sh
+```
+
+Our CI pipeline automatically checks if your code is properly formatted. If the check fails, you'll need to run the formatting script locally and commit the changes before your PR can be merged.
+
 ### 🚨 Reporting a Vulnerability
 
 If you think you have found a vulnerability, and even if you are not sure about it,
diff --git a/README.md b/README.md
index 0665a9584..5c5b033d8 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,15 @@ installation details.
 We welcome contributions from anyone to showcase your project built using ZenML.
 See our [contributing guide](./CONTRIBUTING.md) to start.
 
+## Code Quality
+
+All code contributions must pass our automated code quality checks:
+- **Code Formatting**: We use [ruff](https://github.com/astral-sh/ruff) for code formatting and linting
+- **Spelling**: We check for typos and spelling errors
+- **Markdown Links**: We verify that all links in documentation work properly
+
+Our CI pipeline will automatically check your PR for these issues. Remember to run `bash scripts/format.sh` locally before submitting your PR to ensure it passes the formatting checks.
+
 # 🆘 Getting Help
 
 By far the easiest and fastest way to get help is to:

From 08adf8d6e8517d5b753d9ab54151dd5aae1703d6 Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:36:47 +0200
Subject: [PATCH 12/13] Update CI

---
 .github/workflows/pull_request.yml          |   4 +
 .github/workflows/readme-projects-check.yml |  21 ++++
 scripts/check-readme-projects.sh            |   3 +
 scripts/check_readme_projects.py            | 130 ++++++++++++++++++++
 4 files changed, 158 insertions(+)
 create mode 100644 .github/workflows/readme-projects-check.yml
 create mode 100755 scripts/check-readme-projects.sh
 create mode 100644 scripts/check_readme_projects.py

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 15796f797..23e271a8a 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -29,3 +29,7 @@ jobs:
   code-formatting-check:
     uses: ./.github/workflows/code-formatting.yml
     if: github.event.pull_request.draft == false
+    
+  readme-projects-check:
+    uses: ./.github/workflows/readme-projects-check.yml
+    if: github.event.pull_request.draft == false
diff --git a/.github/workflows/readme-projects-check.yml b/.github/workflows/readme-projects-check.yml
new file mode 100644
index 000000000..d82e2f3a2
--- /dev/null
+++ b/.github/workflows/readme-projects-check.yml
@@ -0,0 +1,21 @@
+name: README Projects Check
+
+on:
+  workflow_call:
+
+jobs:
+  readme-projects-check:
+    name: Check Projects in README
+    runs-on: ubuntu-latest
+    if: github.event.pull_request.draft == false
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+      
+      - name: Run README projects check
+        run: python3 scripts/check_readme_projects.py
\ No newline at end of file
diff --git a/scripts/check-readme-projects.sh b/scripts/check-readme-projects.sh
new file mode 100755
index 000000000..847ab2139
--- /dev/null
+++ b/scripts/check-readme-projects.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+# Check if all project directories are listed in the README table
+python scripts/check_readme_projects.py
\ No newline at end of file
diff --git a/scripts/check_readme_projects.py b/scripts/check_readme_projects.py
new file mode 100644
index 000000000..77a8a5a27
--- /dev/null
+++ b/scripts/check_readme_projects.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+"""
+CI check to ensure all project directories are listed in the README table.
+
+This script checks for project directories (excluding certain directories like _assets,
+scripts, etc.) and ensures that they are all referenced in the project table in the
+main README.md file.
+
+Projects can be exempted from this check by adding them to the exempt_projects set
+in the get_project_directories function. This is useful for work-in-progress projects,
+internal projects, or projects that are not meant to be public-facing.
+"""
+
+import os
+import re
+import sys
+from pathlib import Path
+
+
+def get_project_directories(repo_root):
+    """Get a list of project directories from the repository.
+    
+    Args:
+        repo_root: The root directory of the repository.
+        
+    Returns:
+        List of project directory names.
+    """
+    # Directories to exclude (infrastructure, config, assets, etc.)
+    exclude_dirs = {
+        "_assets",
+        "scripts",
+        "assets",
+        ".git",
+        "__pycache__",
+        ".github",
+        "wandb",
+    }
+    
+    # Projects to exempt from README table requirement
+    # Add directories here that don't need to be in the README table
+    exempt_projects = {
+        # Work-in-progress or internal projects
+        "finscan",
+        "sonicscribe",
+    }
+    
+    project_dirs = []
+    
+    for item in os.listdir(repo_root):
+        item_path = os.path.join(repo_root, item)
+        
+        # Check if the item is a directory and not in the exclude or exempt lists
+        if (os.path.isdir(item_path) and 
+            item not in exclude_dirs and 
+            item not in exempt_projects and 
+            not item.startswith(".")):
+            # Skip directories that are Python package-related but not actual projects
+            if not item.startswith("__") and item != "venv" and item != "env":
+                project_dirs.append(item)
+    
+    return project_dirs
+
+
+def get_readme_projects(readme_path):
+    """Extract project directories listed in the README table.
+    
+    Args:
+        readme_path: Path to the README.md file.
+        
+    Returns:
+        List of project directory names referenced in the README.
+    """
+    with open(readme_path, "r") as f:
+        readme_content = f.read()
+    
+    # Find the project table
+    table_pattern = r"\| Project\s+\| Domain.*?\n(.*?)(?:\n\n|\n#)"
+    table_match = re.search(table_pattern, readme_content, re.DOTALL)
+    
+    if not table_match:
+        print("Error: Could not find project table in README.md")
+        return []
+    
+    table_content = table_match.group(1)
+    
+    # Extract project links from the table
+    # The pattern looks for Markdown links like [ProjectName](directory)
+    link_pattern = r"\[.*?\]\((.*?)\)"
+    project_links = re.findall(link_pattern, table_content)
+    
+    # Convert links to directory names
+    readme_projects = []
+    for link in project_links:
+        # Remove trailing slash if present
+        if link.endswith("/"):
+            link = link[:-1]
+        readme_projects.append(link)
+    
+    return readme_projects
+
+
+def main():
+    """Main function to run the check."""
+    # Get the repository root
+    repo_root = Path(__file__).parent.parent.absolute()
+    
+    # Get project directories from the repository (already excludes exempted projects)
+    project_dirs = get_project_directories(repo_root)
+    
+    # Get projects listed in the README
+    readme_path = os.path.join(repo_root, "README.md")
+    readme_projects = get_readme_projects(readme_path)
+    
+    # Find missing projects
+    missing_projects = set(project_dirs) - set(readme_projects)
+    
+    if missing_projects:
+        print("Error: The following project directories are not listed in the README table:")
+        for project in sorted(missing_projects):
+            print(f"  - {project}")
+        print("\nTo exempt a project from this check, add it to the exempt_projects set in this script.")
+        return 1
+    
+    print("Success: All required project directories are listed in the README table.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 7776d1f04946f115ed4dcc87853612eeba5be393 Mon Sep 17 00:00:00 2001
From: Alex Strick van Linschoten <stricksubscriptions@fastmail.fm>
Date: Sat, 17 May 2025 19:37:52 +0200
Subject: [PATCH 13/13] Formatting

---
 scripts/check_readme_projects.py | 62 ++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/scripts/check_readme_projects.py b/scripts/check_readme_projects.py
index 77a8a5a27..1aacd8091 100644
--- a/scripts/check_readme_projects.py
+++ b/scripts/check_readme_projects.py
@@ -19,10 +19,10 @@
 
 def get_project_directories(repo_root):
     """Get a list of project directories from the repository.
-    
+
     Args:
         repo_root: The root directory of the repository.
-        
+
     Returns:
         List of project directory names.
     """
@@ -36,7 +36,7 @@ def get_project_directories(repo_root):
         ".github",
         "wandb",
     }
-    
+
     # Projects to exempt from README table requirement
     # Add directories here that don't need to be in the README table
     exempt_projects = {
@@ -44,51 +44,53 @@ def get_project_directories(repo_root):
         "finscan",
         "sonicscribe",
     }
-    
+
     project_dirs = []
-    
+
     for item in os.listdir(repo_root):
         item_path = os.path.join(repo_root, item)
-        
+
         # Check if the item is a directory and not in the exclude or exempt lists
-        if (os.path.isdir(item_path) and 
-            item not in exclude_dirs and 
-            item not in exempt_projects and 
-            not item.startswith(".")):
+        if (
+            os.path.isdir(item_path)
+            and item not in exclude_dirs
+            and item not in exempt_projects
+            and not item.startswith(".")
+        ):
             # Skip directories that are Python package-related but not actual projects
             if not item.startswith("__") and item != "venv" and item != "env":
                 project_dirs.append(item)
-    
+
     return project_dirs
 
 
 def get_readme_projects(readme_path):
     """Extract project directories listed in the README table.
-    
+
     Args:
         readme_path: Path to the README.md file.
-        
+
     Returns:
         List of project directory names referenced in the README.
     """
     with open(readme_path, "r") as f:
         readme_content = f.read()
-    
+
     # Find the project table
     table_pattern = r"\| Project\s+\| Domain.*?\n(.*?)(?:\n\n|\n#)"
     table_match = re.search(table_pattern, readme_content, re.DOTALL)
-    
+
     if not table_match:
         print("Error: Could not find project table in README.md")
         return []
-    
+
     table_content = table_match.group(1)
-    
+
     # Extract project links from the table
     # The pattern looks for Markdown links like [ProjectName](directory)
     link_pattern = r"\[.*?\]\((.*?)\)"
     project_links = re.findall(link_pattern, table_content)
-    
+
     # Convert links to directory names
     readme_projects = []
     for link in project_links:
@@ -96,7 +98,7 @@ def get_readme_projects(readme_path):
         if link.endswith("/"):
             link = link[:-1]
         readme_projects.append(link)
-    
+
     return readme_projects
 
 
@@ -104,25 +106,31 @@ def main():
     """Main function to run the check."""
     # Get the repository root
     repo_root = Path(__file__).parent.parent.absolute()
-    
+
     # Get project directories from the repository (already excludes exempted projects)
     project_dirs = get_project_directories(repo_root)
-    
+
     # Get projects listed in the README
     readme_path = os.path.join(repo_root, "README.md")
     readme_projects = get_readme_projects(readme_path)
-    
+
     # Find missing projects
     missing_projects = set(project_dirs) - set(readme_projects)
-    
+
     if missing_projects:
-        print("Error: The following project directories are not listed in the README table:")
+        print(
+            "Error: The following project directories are not listed in the README table:"
+        )
         for project in sorted(missing_projects):
             print(f"  - {project}")
-        print("\nTo exempt a project from this check, add it to the exempt_projects set in this script.")
+        print(
+            "\nTo exempt a project from this check, add it to the exempt_projects set in this script."
+        )
         return 1
-    
-    print("Success: All required project directories are listed in the README table.")
+
+    print(
+        "Success: All required project directories are listed in the README table."
+    )
     return 0