Addressing comments

prabhuteja12 · prabhuteja12 · commit c5e1685f607a · 2026-02-11T10:34:03.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,8 +6,6 @@
 
 ### Tasks
 
-- Added `BalancedCOPA` benchmark task (balanced variant of COPA from `pkavumba/balanced-copa` on HuggingFace)
-
 ### Metrics
 
 ### General
diff --git a/src/eval_framework/tasks/benchmarks/balancedcopa.py b/src/eval_framework/tasks/benchmarks/balancedcopa.py
@@ -37,6 +37,8 @@ def _split_dataset_into_train_and_val(self, dataset: DatasetDict) -> DatasetDict
         # We split the train data into train and validation splits so that
         # the validation split matches the validation split of the original COPA dataset.
         # These magic numbers of the ids below were arrived at after manual inspection of the dataset.
+        # Numbers 401-500 correspond to the validation split of the original COPA dataset.
+        # Numbers 1401-1500 correspond to the mirrored version of the val split.
         # The sanity of this version is maintained by the HF_REVISION above.
         dataset["validation"], dataset["train"] = split_dataset_by_id_ranges(
             dataset["train"], "id", [(401, 500), (1401, 1500)]