Remove dead code nd fix best of n calculation

ilmarinen · ilmarinen · commit 9c58d14bfb3c · 2025-07-12T01:25:43.000-07:00
diff --git a/eureka_ml_insights/data_utils/__init__.py b/eureka_ml_insights/data_utils/__init__.py
@@ -23,6 +23,7 @@
     AddColumn,
     AddColumnAndData,
     ASTEvalTransform,
+    CleanCOTAnswer,
     ColumnMatchMapTransform,
     ColumnRename,
     CopyColumn,
diff --git a/eureka_ml_insights/data_utils/arc_agi_utils.py b/eureka_ml_insights/data_utils/arc_agi_utils.py
@@ -37,33 +37,3 @@ def parse_output_answer(response):
         answer = response[start_index:end_index].strip()
 
         return answer
-
-
-@dataclass
-class ARCAGI_CleanCOTAnswer(DFTransformBase):
-    model_output_column: str
-    model_answer_column: str
-
-    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
-        df[self.model_answer_column] = df[self.model_output_column].apply(self.parse_output_answer)
-        return df
-
-    @staticmethod
-    def parse_output_answer(response):
-        """
-        Replace None responses with an empty string
-        Parameters:
-            response (str): Possibly None Response string
-        Returns: 
-            answer (str): Response string with None replaced by blank string
-        """
-        if response is None:
-            return ""
-        
-        start_index = response.find("</think>") + len("</think>")
-        if start_index == -1:
-            return response
-        
-        response = response[start_index:]
-
-        return response
diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
@@ -544,3 +544,39 @@ def _extract_usage(self, row, usage_completion_read_col):
         if not pd.isna(row[self.usage_column]) and usage_completion_read_col in row[self.usage_column]:
             return row[self.usage_column][usage_completion_read_col]
         return np.nan
+
+
+@dataclass
+class CleanCOTAnswer(DFTransformBase):
+    """
+    Transform to strip out anything before and including the </think> tag in the model response
+    """
+
+    model_output_column: str
+    model_answer_column: str
+
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        df[self.model_answer_column] = df[self.model_output_column].apply(self.parse_output_answer)
+        return df
+
+    @staticmethod
+    def parse_output_answer(response):
+        """
+        Replace None responses with an empty string
+        Parameters:
+            response (str): Possibly None Response string
+        Returns: 
+            answer (str): Response string with None replaced by blank string
+        """
+        if response is None:
+            return ""
+        
+        start_index = response.find("</think>")
+        if start_index == -1:
+            return response
+        
+        start_index = start_index + len("</think>")
+        
+        response = response[start_index:]
+
+        return response
diff --git a/eureka_ml_insights/user_configs/arc_agi.py b/eureka_ml_insights/user_configs/arc_agi.py