Skip to content

Commit f965777

Browse files
committed
Add comments for empty post processing component
1 parent bb5542d commit f965777

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

eureka_ml_insights/user_configs/arc_agi.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ def configure_pipeline(self, model_config=None, resume_from=None, **kwargs) -> P
9090
)
9191

9292
# Configure the data post processing component.
93+
# We keep this component empty so that there is a place to insert a post-processing transform
94+
# to strip out the chain of thought from models that insert chains of thought using
95+
# <think> and </think> tags.
9396
self.data_post_processing = DataProcessingConfig(
9497
component_type=DataProcessing,
9598
data_reader_config=DataSetConfig(
@@ -235,6 +238,9 @@ class COT_ARC_AGI_v1_PIPELINE(ARC_AGI_v1_PIPELINE):
235238

236239
def configure_pipeline(self, model_config=None, resume_from=None, **kwargs):
237240
config = super().configure_pipeline(model_config=model_config, resume_from=resume_from)
241+
242+
# Here we modify the existing post processing component by inserting a post-processing
243+
# transform to strip out the chain of thought inserted between <think> and </think> tags.
238244
self.data_post_processing.data_reader_config.init_args["transform"] = SequenceTransform(
239245
[
240246
ColumnRename(

0 commit comments

Comments
 (0)