Skip to content

Commit 9c58d14

Browse files
committed
Remove dead code nd fix best of n calculation
1 parent 727404e commit 9c58d14

File tree

4 files changed

+82
-336
lines changed

4 files changed

+82
-336
lines changed

eureka_ml_insights/data_utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
AddColumn,
2424
AddColumnAndData,
2525
ASTEvalTransform,
26+
CleanCOTAnswer,
2627
ColumnMatchMapTransform,
2728
ColumnRename,
2829
CopyColumn,

eureka_ml_insights/data_utils/arc_agi_utils.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -37,33 +37,3 @@ def parse_output_answer(response):
3737
answer = response[start_index:end_index].strip()
3838

3939
return answer
40-
41-
42-
@dataclass
43-
class ARCAGI_CleanCOTAnswer(DFTransformBase):
44-
model_output_column: str
45-
model_answer_column: str
46-
47-
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
48-
df[self.model_answer_column] = df[self.model_output_column].apply(self.parse_output_answer)
49-
return df
50-
51-
@staticmethod
52-
def parse_output_answer(response):
53-
"""
54-
Replace None responses with an empty string
55-
Parameters:
56-
response (str): Possibly None Response string
57-
Returns:
58-
answer (str): Response string with None replaced by blank string
59-
"""
60-
if response is None:
61-
return ""
62-
63-
start_index = response.find("</think>") + len("</think>")
64-
if start_index == -1:
65-
return response
66-
67-
response = response[start_index:]
68-
69-
return response

eureka_ml_insights/data_utils/transform.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,3 +544,39 @@ def _extract_usage(self, row, usage_completion_read_col):
544544
if not pd.isna(row[self.usage_column]) and usage_completion_read_col in row[self.usage_column]:
545545
return row[self.usage_column][usage_completion_read_col]
546546
return np.nan
547+
548+
549+
@dataclass
550+
class CleanCOTAnswer(DFTransformBase):
551+
"""
552+
Transform to strip out anything before and including the </think> tag in the model response
553+
"""
554+
555+
model_output_column: str
556+
model_answer_column: str
557+
558+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
559+
df[self.model_answer_column] = df[self.model_output_column].apply(self.parse_output_answer)
560+
return df
561+
562+
@staticmethod
563+
def parse_output_answer(response):
564+
"""
565+
Replace None responses with an empty string
566+
Parameters:
567+
response (str): Possibly None Response string
568+
Returns:
569+
answer (str): Response string with None replaced by blank string
570+
"""
571+
if response is None:
572+
return ""
573+
574+
start_index = response.find("</think>")
575+
if start_index == -1:
576+
return response
577+
578+
start_index = start_index + len("</think>")
579+
580+
response = response[start_index:]
581+
582+
return response

0 commit comments

Comments
 (0)