From e60e2d37322543513da3c1efe68047cd088ab8d7 Mon Sep 17 00:00:00 2001
From: marwan37 <marwan.ext@zenml.io>
Date: Thu, 20 Mar 2025 07:39:24 -0500
Subject: [PATCH 1/4] add TODO comment to fix preprocesser typo

---
 .typos.toml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.typos.toml b/.typos.toml
index 75fc8b0c..235d748e 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -18,8 +18,6 @@ MDEyOk9yZ2FuaXphdGlvbjg4Njc2OTU1 = "MDEyOk9yZ2FuaXphdGlvbjg4Njc2OTU1"
 
 [default.extend-words]
 # Don't correct the surname "Teh"
-preprocesser = "preprocesser"
-Preprocesser = "Preprocesser"
 aks = "aks"
 hashi = "hashi"
 womens = "womens"
@@ -48,6 +46,9 @@ huggingface = "huggingface"
 answerdotai = "answerdotai"
 preprocessor = "preprocessor"
 logits = "logits"
+# TODO: Fix zenml library typo -- zenml.steps.preprocesser, Preprocesser
+preprocesser = "preprocesser"
+Preprocesser = "Preprocesser"
 
 [default]
 locale = "en-us"

From db62c178518339b96f9f07ef51db204ca6a65c6f Mon Sep 17 00:00:00 2001
From: marwan37 <marwan.ext@zenml.io>
Date: Sat, 22 Mar 2025 16:47:26 -0500
Subject: [PATCH 2/4] fix config references in classification pipeline

---
 research-radar/pipelines/classification.py | 41 ++++++++++------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/research-radar/pipelines/classification.py b/research-radar/pipelines/classification.py
index 743284c5..5366e302 100644
--- a/research-radar/pipelines/classification.py
+++ b/research-radar/pipelines/classification.py
@@ -15,9 +15,7 @@
 # limitations under the License.
 #
 
-"""
-Pipeline for article classification and dataset processing.
-"""
+"""Pipeline for article classification and dataset processing."""
 
 from typing import Dict, Optional
 
@@ -38,8 +36,7 @@
 
 @pipeline(enable_cache=False)
 def classification_pipeline(config: Optional[Dict] = None):
-    """
-    Pipeline for article classification and dataset processing.
+    """Pipeline for article classification and dataset processing.
 
     Args:
         config: Pipeline configuration from base_config.yaml
@@ -53,15 +50,15 @@ def classification_pipeline(config: Optional[Dict] = None):
 
     hf_token = get_hf_token()
 
-    pipeline_config = config.steps.classify
-    classification_type = pipeline_config.classification_type
+    classify_config = config["steps"]["classify"]
+    classification_type = classify_config["classification_type"]
 
     logger.log_classification_type(classification_type)
 
     dataset_path = (
-        config.datasets.unclassified
+        config["datasets"]["unclassified"]
         if classification_type == "augmentation"
-        else config.datasets.composite
+        else config["datasets"]["composite"]
     )
 
     articles = load_classification_dataset(dataset_path)
@@ -69,25 +66,25 @@ def classification_pipeline(config: Optional[Dict] = None):
     classifications = classify_articles(
         articles=articles,
         hf_token=hf_token,
-        model_id=config.model_repo_ids.deepseek,
-        inference_params=pipeline_config.inference_params,
+        model_id=config["model_repo_ids"]["deepseek"],
+        inference_params=classify_config["inference_params"],
         classification_type=classification_type,
-        batch_config=pipeline_config.batch_processing,
-        parallel_config=pipeline_config.parallel_processing,
-        checkpoint_config=pipeline_config.checkpoint,
+        batch_config=classify_config["batch_processing"],
+        parallel_config=classify_config["parallel_processing"],
+        checkpoint_config=classify_config["checkpoint"],
     )
 
     results_path = save_classifications(
         classifications=classifications,
         classification_type=classification_type,
-        model_id=config.model_repo_ids.deepseek,
-        inference_params=pipeline_config.inference_params,
-        batch_config=pipeline_config.batch_processing,
-        checkpoint_config=pipeline_config.checkpoint,
+        model_id=config["model_repo_ids"]["deepseek"],
+        inference_params=classify_config["inference_params"],
+        batch_config=classify_config["batch_processing"],
+        checkpoint_config=classify_config["checkpoint"],
     )
 
     if classification_type == "evaluation":
-        base_dataset_path = config.datasets.composite
+        base_dataset_path = config["datasets"]["composite"]
         calculate_and_save_metrics_from_json(
             results_path=str(results_path),
             base_dataset_path=base_dataset_path,
@@ -96,7 +93,7 @@ def classification_pipeline(config: Optional[Dict] = None):
     if classification_type == "augmentation":
         merge_classifications(
             results_path=results_path,
-            training_dataset_path=config.datasets.composite,
-            augmented_dataset_path=config.datasets.augmented,
-            source_dataset_path=config.datasets.unclassified,
+            training_dataset_path=config["datasets"]["composite"],
+            augmented_dataset_path=config["datasets"]["augmented"],
+            source_dataset_path=config["datasets"]["unclassified"],
         )

From b580f514e8082f734e443d22dd740980afec99ca Mon Sep 17 00:00:00 2001
From: marwan37 <marwan.ext@zenml.io>
Date: Sat, 22 Mar 2025 16:58:05 -0500
Subject: [PATCH 3/4] undo addition of preprocesser to .typos.toml

---
 .typos.toml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.typos.toml b/.typos.toml
index 235d748e..4e44f644 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -44,11 +44,7 @@ modernbert = "modernbert"
 deepseek = "deepseek"
 huggingface = "huggingface"
 answerdotai = "answerdotai"
-preprocessor = "preprocessor"
 logits = "logits"
-# TODO: Fix zenml library typo -- zenml.steps.preprocesser, Preprocesser
-preprocesser = "preprocesser"
-Preprocesser = "Preprocesser"
 
 [default]
-locale = "en-us"
+locale = "en-us"
\ No newline at end of file

From 47e2ad284d4118b992d687387336f93865f9c60b Mon Sep 17 00:00:00 2001
From: marwan37 <marwan.ext@zenml.io>
Date: Sat, 22 Mar 2025 16:59:13 -0500
Subject: [PATCH 4/4] checkout main .typos.toml

---
 .typos.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.typos.toml b/.typos.toml
index 4e44f644..2fa346b5 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -18,6 +18,8 @@ MDEyOk9yZ2FuaXphdGlvbjg4Njc2OTU1 = "MDEyOk9yZ2FuaXphdGlvbjg4Njc2OTU1"
 
 [default.extend-words]
 # Don't correct the surname "Teh"
+preprocesser = "preprocesser"
+Preprocesser = "Preprocesser"
 aks = "aks"
 hashi = "hashi"
 womens = "womens"
@@ -44,6 +46,7 @@ modernbert = "modernbert"
 deepseek = "deepseek"
 huggingface = "huggingface"
 answerdotai = "answerdotai"
+preprocessor = "preprocessor"
 logits = "logits"
 
 [default]