Make think tag name a parameter and fix unused imports

ilmarinen · ilmarinen · commit a5fcc6f141f4 · 2025-07-14T22:50:26.000-07:00
diff --git a/eureka_ml_insights/data_utils/__init__.py b/eureka_ml_insights/data_utils/__init__.py
@@ -23,7 +23,6 @@
     AddColumn,
     AddColumnAndData,
     ASTEvalTransform,
-    CleanCOTAnswer,
     ColumnMatchMapTransform,
     ColumnRename,
     CopyColumn,
diff --git a/eureka_ml_insights/data_utils/arc_agi_utils.py b/eureka_ml_insights/data_utils/arc_agi_utils.py
@@ -1,4 +1,3 @@
-import re
 from dataclasses import dataclass
 
 import pandas as pd
@@ -21,8 +20,8 @@ def parse_output_answer(response):
         Parse the input string to extract answer of a given ARCAGI question.
         Parameters:
             response (str): Input string containing answer X in the form of "<output>final answer string</output>".
-        Returns: 
-            answer (str): The final answer string with leading and training spaces stripped.
+        Returns:
+            answer (str): The final answer string with leading and trailing spaces stripped.
         """
         answer = ""
 
diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
@@ -549,11 +549,12 @@ def _extract_usage(self, row, usage_completion_read_col):
 @dataclass
 class CleanCOTAnswer(DFTransformBase):
     """
-    Transform to strip out anything before and including the </think> tag in the model response
+    Transform to strip out anything before and including the </think_tag_name> tag in the model response
     """
 
     model_output_column: str
     model_answer_column: str
+    think_tag_name: str = "think"
 
     def transform(self, df: pd.DataFrame) -> pd.DataFrame:
         df[self.model_answer_column] = df[self.model_output_column].apply(self.parse_output_answer)
@@ -562,21 +563,22 @@ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
     @staticmethod
     def parse_output_answer(response):
         """
-        Replace None responses with an empty string
+        Possibly null response string with chain of thought wrapped in <think_tag_name> and </think_tag_name> tags
         Parameters:
-            response (str): Possibly None Response string
-        Returns: 
-            answer (str): Response string with None replaced by blank string
+            response (str): Possibly null response string with chain of thought wrapped in
+                            <think_tag_name> and </think_tag_name> tags.
+        Returns:
+            answer (str): Response string with None replaced by blank string and the chain of thought stripped out.
         """
         if response is None:
             return ""
-        
-        start_index = response.find("</think>")
+
+        start_index = response.find(f"</{self.think_tag_name}>")
         if start_index == -1:
             return response
-        
-        start_index = start_index + len("</think>")
-        
+
+        start_index = start_index + len(f"</{self.think_tag_name}>")
+
         response = response[start_index:]
 
         return response
diff --git a/eureka_ml_insights/user_configs/__init__.py b/eureka_ml_insights/user_configs/__init__.py
@@ -5,14 +5,6 @@
     AIME_PIPELINE,
 )
 from .aime_seq import AIME_SEQ_PIPELINE
-from .arc_agi import (
-    ARC_AGI_v1_PIPELINE,
-    ARC_AGI_v1_PIPELINE_5Run,
-    COT_ARC_AGI_v1_PIPELINE,
-    COT_ARC_AGI_v1_PIPELINE_5Run,
-    COT_ARC_AGI_v1_PIPELINE_5050_SUBSET,
-    COT_ARC_AGI_v1_PIPELINE_5050_SUBSET_5Run,
-)
 from .ba_calendar import (
     BA_Calendar_Parallel_PIPELINE,
     BA_Calendar_PIPELINE,
diff --git a/eureka_ml_insights/user_configs/arc_agi.py b/eureka_ml_insights/user_configs/arc_agi.py
@@ -4,38 +4,27 @@
 from eureka_ml_insights.core import Inference, PromptProcessing
 from eureka_ml_insights.core.data_processing import DataProcessing
 from eureka_ml_insights.core.eval_reporting import EvalReporting
-from eureka_ml_insights.data_utils.arc_agi_utils import (
-    ARCAGI_ExtractAnswer
-)
+from eureka_ml_insights.data_utils.arc_agi_utils import ARCAGI_ExtractAnswer
 from eureka_ml_insights.data_utils.data import (
     DataLoader,
     DataReader,
     HFDataReader,
 )
-from eureka_ml_insights.metrics.metrics_base import ExactMatch
-from eureka_ml_insights.metrics.reports import (
-    CountAggregator,
-    AverageAggregator,
-    BiLevelCountAggregator,
-    BiLevelAggregator,
-    CountAggregator
-)
-
 from eureka_ml_insights.data_utils.transform import (
     AddColumn,
-    AddColumnAndData,
     CleanCOTAnswer,
     ColumnRename,
     CopyColumn,
     ExtractUsageTransform,
-    MajorityVoteTransform,
     MultiplyTransform,
     ReplaceStringsTransform,
-    RunPythonTransform,
-    SamplerTransform,
     SequenceTransform,
 )
-from eureka_ml_insights.metrics.ba_calendar_metrics import BACalendarMetric
+from eureka_ml_insights.metrics.metrics_base import ExactMatch
+from eureka_ml_insights.metrics.reports import (
+    BiLevelAggregator,
+    CountAggregator,
+)
 
 from ..configs.config import (
     AggregatorConfig,
@@ -64,14 +53,14 @@ def configure_pipeline(self, model_config=None, resume_from=None, **kwargs) -> P
             data_reader_config=DataSetConfig(
                 HFDataReader,
                 {
-                   "path": "pxferna/ARC-AGI-v1",
-                   "split": "test",
+                    "path": "pxferna/ARC-AGI-v1",
+                    "split": "test",
                     "transform": SequenceTransform(
                         [
                             MultiplyTransform(n_repeats=1),
                         ]
                     ),
-                }
+                },
             ),
             output_dir=os.path.join(self.log_dir, "data_processing_output"),
         )
@@ -100,9 +89,7 @@ def configure_pipeline(self, model_config=None, resume_from=None, **kwargs) -> P
                 {
                     "path": os.path.join(self.inference_comp.output_dir, "inference_result.jsonl"),
                     "format": ".jsonl",
-                    "transform": SequenceTransform(
-                        []
-                    ),
+                    "transform": SequenceTransform([]),
                 },
             ),
             output_dir=os.path.join(self.log_dir, "data_post_processing_output"),
@@ -144,14 +131,15 @@ def configure_pipeline(self, model_config=None, resume_from=None, **kwargs) -> P
                     },
                 ),
                 AggregatorConfig(
-                    CountAggregator, 
+                    CountAggregator,
                     {
                         "column_names": [
                             "ExactMatch_result",
                         ],
                         "normalize": True,
                         "filename_base": "OverallMetrics_Total",
-                    }),
+                    },
+                ),
             ],
             output_dir=os.path.join(self.log_dir, "eval_report"),
         )
@@ -165,14 +153,15 @@ def configure_pipeline(self, model_config=None, resume_from=None, **kwargs) -> P
                     "format": ".jsonl",
                     "transform": SequenceTransform(
                         [
-                        CopyColumn(
+                            CopyColumn(
                                 column_name_src="ExactMatch_result",
                                 column_name_dst="ExactMatch_result_numeric",
                             ),
-                        ReplaceStringsTransform(
+                            ReplaceStringsTransform(
                                 columns=["ExactMatch_result_numeric"],
-                                mapping={'incorrect': '0', 'correct': '1', 'none': 'NaN'},
-                                case=False)
+                                mapping={"incorrect": "0", "correct": "1", "none": "NaN"},
+                                case=False,
+                            ),
                         ]
                     ),
                 },
@@ -186,30 +175,29 @@ def configure_pipeline(self, model_config=None, resume_from=None, **kwargs) -> P
                 DataReader,
                 {
                     "path": os.path.join(self.posteval_data_post_processing_comp.output_dir, "transformed_data.jsonl"),
-                    "format": ".jsonl"
+                    "format": ".jsonl",
                 },
             ),
             aggregator_configs=[
                 AggregatorConfig(
-                    BiLevelAggregator, 
+                    BiLevelAggregator,
                     {
                         "column_names": [
                             "ExactMatch_result_numeric",
                         ],
                         "first_groupby": "data_point_id",
                         "filename_base": "ExactMatch_Total_BestOfN",
-                        "agg_fn": "max"
-                    }),
+                        "agg_fn": "max",
+                    },
+                ),
                 AggregatorConfig(
                     BiLevelAggregator,
                     {
-                        "column_names": [
-                            "ExactMatch_result_numeric"
-                        ],
+                        "column_names": ["ExactMatch_result_numeric"],
                         "first_groupby": "data_point_id",
                         "second_groupby": "split",
                         "filename_base": "ExactMatch_Grouped_by_Split_BestOfN",
-                        "agg_fn": "max"
+                        "agg_fn": "max",
                     },
                 ),
             ],
@@ -301,7 +289,7 @@ def configure_pipeline(self, model_config=None, resume_from=None, **kwargs):
                         MultiplyTransform(n_repeats=1),
                     ]
                 ),
-            }
+            },
         )
 
         return config