Skip to content

Commit f6cc7e0

Browse files
committed
Add ARC AGI 5run pipeline
1 parent f02ad90 commit f6cc7e0

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

eureka_ml_insights/user_configs/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .aime_seq import AIME_SEQ_PIPELINE
88
from .arc_agi import (
99
ARC_AGI_v1_PIPELINE,
10+
ARC_AGI_v1_PIPELINE_5Run,
1011
)
1112
from .ba_calendar import (
1213
BA_Calendar_Parallel_PIPELINE,

eureka_ml_insights/user_configs/arc_agi.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ExtractUsageTransform,
3030
MajorityVoteTransform,
3131
MultiplyTransform,
32+
ReplaceStringsTransform,
3233
RunPythonTransform,
3334
SamplerTransform,
3435
SequenceTransform,
@@ -64,6 +65,11 @@ def configure_pipeline(self, model_config=None, resume_from=None, resume_logdir=
6465
{
6566
"path": "pxferna/ARC-AGI-v1",
6667
"split": "test",
68+
"transform": SequenceTransform(
69+
[
70+
MultiplyTransform(n_repeats=1),
71+
]
72+
),
6773
}
6874
),
6975
output_dir=os.path.join(self.log_dir, "data_processing_output"),
@@ -135,12 +141,88 @@ def configure_pipeline(self, model_config=None, resume_from=None, resume_logdir=
135141
output_dir=os.path.join(self.log_dir, "eval_report"),
136142
)
137143

144+
self.posteval_data_post_processing_comp = DataProcessingConfig(
145+
component_type=DataProcessing,
146+
data_reader_config=DataSetConfig(
147+
DataReader,
148+
{
149+
"path": os.path.join(self.evalreporting_comp.output_dir, "metric_results.jsonl"),
150+
"format": ".jsonl",
151+
"transform": SequenceTransform(
152+
[
153+
CopyColumn(
154+
column_name_src="ExactMatch_result",
155+
column_name_dst="ExactMatch_result_numeric",
156+
),
157+
ReplaceStringsTransform(
158+
columns=["ExactMatch_result_numeric"],
159+
mapping={'incorrect': '0', 'correct': '1', 'none': 'NaN'},
160+
case=False)
161+
]
162+
),
163+
},
164+
),
165+
output_dir=os.path.join(self.log_dir, "posteval_data_post_processing_output"),
166+
)
167+
168+
self.best_of_n_evalreporting_comp = EvalReportingConfig(
169+
component_type=EvalReporting,
170+
data_reader_config=DataSetConfig(
171+
DataReader,
172+
{
173+
"path": os.path.join(self.posteval_data_post_processing_comp.output_dir, "transformed_data.jsonl"),
174+
"format": ".jsonl"
175+
},
176+
),
177+
aggregator_configs=[
178+
AggregatorConfig(
179+
BiLevelAggregator,
180+
{
181+
"column_names": [
182+
"ExactMatch_result_numeric",
183+
],
184+
"first_groupby": "uid",
185+
"filename_base": "ExactMatch_Total_BestOfN",
186+
}),
187+
# the first three reports aggregate results by data_point_id and take the best out of N
188+
AggregatorConfig(
189+
BiLevelAggregator,
190+
{
191+
"column_names": [
192+
"ExactMatch_result_numeric"
193+
],
194+
"first_groupby": "uid",
195+
"second_groupby": "split",
196+
"filename_base": "ExactMatch_Grouped_BestOfN",
197+
"agg_fn": "max"
198+
},
199+
),
200+
],
201+
output_dir=os.path.join(self.log_dir, "bestofn_eval_report"),
202+
)
203+
138204
# Configure the pipeline
139205
return PipelineConfig(
140206
[
141207
self.data_processing_comp,
142208
self.inference_comp,
143209
self.evalreporting_comp,
210+
self.posteval_data_post_processing_comp,
211+
self.best_of_n_evalreporting_comp,
144212
],
145213
self.log_dir,
146214
)
215+
216+
217+
class ARC_AGI_v1_PIPELINE_5Run(ARC_AGI_v1_PIPELINE):
218+
"""This class specifies the config for running the GPQA benchmark 5 repeated times"""
219+
220+
def configure_pipeline(
221+
self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]
222+
) -> PipelineConfig:
223+
pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from)
224+
# data preprocessing
225+
self.data_processing_comp.data_reader_config.init_args["transform"].transforms.append(
226+
MultiplyTransform(n_repeats=5)
227+
)
228+
return pipeline

0 commit comments

Comments
 (0)