From 729ac7ff3a8c4db8034ef3529fc7a8b3babbfc71 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Mon, 29 Sep 2025 23:41:10 +0000
Subject: [PATCH 1/3] minor updates to configs and files

---
 eureka_ml_insights/data_utils/aime_utils.py         |  6 ++++++
 eureka_ml_insights/data_utils/transform.py          |  2 ++
 eureka_ml_insights/models/models.py                 |  9 ++++++---
 eureka_ml_insights/user_configs/__init__.py         |  1 +
 eureka_ml_insights/user_configs/aime.py             | 13 ++++++-------
 eureka_ml_insights/user_configs/gpqa.py             |  9 +++------
 .../user_configs/vision_language/maze.py            | 13 ++++++-------
 main.py                                             |  8 +++++++-
 8 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/eureka_ml_insights/data_utils/aime_utils.py b/eureka_ml_insights/data_utils/aime_utils.py
index d36742cd..ec94f82a 100644
--- a/eureka_ml_insights/data_utils/aime_utils.py
+++ b/eureka_ml_insights/data_utils/aime_utils.py
@@ -26,6 +26,12 @@ def parse_output_answer(response):
         """
         numerical_value = None
 
+        if response is None:
+            return None
+
+        if isinstance(response, float):
+            return response
+
         # Try to find an answer in the "Final Answer: X" format
         match = re.search(r"Final Answer:\s*([\$]?-?[\d,]+(?:\.\d+)?%?)", response)
         # If not found, try to find an answer in the "Final Answer: [X]" format
diff --git a/eureka_ml_insights/data_utils/transform.py b/eureka_ml_insights/data_utils/transform.py
index aafcf296..662ad5a9 100644
--- a/eureka_ml_insights/data_utils/transform.py
+++ b/eureka_ml_insights/data_utils/transform.py
@@ -348,6 +348,8 @@ class RegexTransform(MultiColumnTransform):
     occurrence: str = "last"
 
     def _transform(self, sentence):
+        if sentence is None:
+            return None
         if self.ignore_case:
             results = re.findall(self.prompt_pattern, sentence, flags=re.IGNORECASE)
         else:
diff --git a/eureka_ml_insights/models/models.py b/eureka_ml_insights/models/models.py
index 82a57caa..8eaf5e46 100644
--- a/eureka_ml_insights/models/models.py
+++ b/eureka_ml_insights/models/models.py
@@ -13,11 +13,14 @@
 import anthropic
 import requests
 import tiktoken
-from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+from azure.identity import AzureCliCredential, DefaultAzureCredential, get_bearer_token_provider
 
 from eureka_ml_insights.secret_management import get_secret
 
 
+credential = DefaultAzureCredential()
+# credential = AzureCliCredential() # use AzureCliCredential for Managed Identity
+
 @dataclass
 class Model(ABC):
     """This class is used to define the structure of a model class.
@@ -374,7 +377,7 @@ def __post_init__(self):
                 "extra-parameters": "pass-through",
             }
         except ValueError:
-            self.bearer_token_provider = get_bearer_token_provider(DefaultAzureCredential(), self.auth_scope)
+            self.bearer_token_provider = get_bearer_token_provider(credential, self.auth_scope)
             self.headers = {
                 "Content-Type": "application/json",
                 "Authorization": ("Bearer " + self.bearer_token_provider()),
@@ -606,7 +609,7 @@ class AzureOpenAIClientMixIn:
     def get_client(self):
         from openai import AzureOpenAI
 
-        token_provider = get_bearer_token_provider(DefaultAzureCredential(), self.auth_scope)
+        token_provider = get_bearer_token_provider(credential, self.auth_scope)
         return AzureOpenAI(
             azure_endpoint=self.url,
             api_version=self.api_version,
diff --git a/eureka_ml_insights/user_configs/__init__.py b/eureka_ml_insights/user_configs/__init__.py
index ec419308..af196a47 100644
--- a/eureka_ml_insights/user_configs/__init__.py
+++ b/eureka_ml_insights/user_configs/__init__.py
@@ -86,6 +86,7 @@
     MAZE_PIPELINE,
     MAZE_REPORTING_PIPELINE,
     MAZE_TEXTONLY_PIPELINE,
+    MAZE_COT_TEXTONLY_PIPELINE,
 )
 from .vision_language.spatial_grid import (
     SPATIAL_GRID_PIPELINE,
diff --git a/eureka_ml_insights/user_configs/aime.py b/eureka_ml_insights/user_configs/aime.py
index 597d1f0b..8a8ca5ed 100644
--- a/eureka_ml_insights/user_configs/aime.py
+++ b/eureka_ml_insights/user_configs/aime.py
@@ -13,9 +13,6 @@
     PipelineConfig,
     PromptProcessingConfig,
 )
-from eureka_ml_insights.configs.model_configs import (
-    OAI_GPT4O_2024_11_20_CONFIG,
-)
 from eureka_ml_insights.core import DataProcessing, Inference, PromptProcessing
 from eureka_ml_insights.core.eval_reporting import EvalReporting
 from eureka_ml_insights.data_utils import (
@@ -52,8 +49,9 @@ class AIME_PIPELINE(ExperimentConfig):
     def configure_pipeline(
         self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]
     ) -> PipelineConfig:
-        self.n_repeats = int(kwargs.get('n_repeat', 1))  # Default value is 1
+        self.n_repeats = int(kwargs.get('n_repeats', 1))  # Default value is 1
         self.max_concurrent = int(kwargs.get('max_concurrent', 1))  # Default value is 1
+        eval_model_config = kwargs.get('eval_model_config', None)
         # data preprocessing
         self.data_processing_comp = PromptProcessingConfig(
             component_type=PromptProcessing,
@@ -441,7 +439,8 @@ def configure_pipeline(
         self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]
     ) -> PipelineConfig:
         pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from,**kwargs)
-        self.llm_extractor_max_concurrent = int(kwargs.get('llm_extractor_max_concurrent', 10))  # Default value is 1
+        self.llm_extractor_max_concurrent = int(kwargs.get('llm_extractor_max_concurrent', 8))  # Default value is 1
+        eval_model_config = kwargs.get('eval_model_config', None)
         answer_col = "extracted_answer"
         llm_extraction_subpipeline_conf = LLM_EXTRACTION_SUBPIPELINE_MIXIN()
         self.llm_extraction_subpipeline = llm_extraction_subpipeline_conf.configure_subpipeline(
@@ -451,7 +450,7 @@ def configure_pipeline(
                 os.path.dirname(__file__),
                 "../prompt_templates/aime_templates/extract_aime_answer.jinja",
             ),
-            llm_extractor_model_config=OAI_GPT4O_2024_11_20_CONFIG,
+            llm_extractor_model_config=eval_model_config,
             log_dir=self.log_dir,
             llm_extractor_max_concurrent=self.llm_extractor_max_concurrent,
             llm_extractor_answer_transforms=[
@@ -486,4 +485,4 @@ def configure_pipeline(
     ) -> PipelineConfig:
         pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from,**kwargs)
         self.data_processing_comp.data_reader_config.init_args["path"] = "lchen001/AIME2025"
-        return pipeline
\ No newline at end of file
+        return pipeline
diff --git a/eureka_ml_insights/user_configs/gpqa.py b/eureka_ml_insights/user_configs/gpqa.py
index b1761c07..942a03da 100644
--- a/eureka_ml_insights/user_configs/gpqa.py
+++ b/eureka_ml_insights/user_configs/gpqa.py
@@ -18,9 +18,6 @@
     PipelineConfig,
     PromptProcessingConfig,
 )
-from eureka_ml_insights.configs.model_configs import (
-    OAI_GPT4O_2024_11_20_CONFIG,
-)
 from eureka_ml_insights.core import (
     DataProcessing,
     EvalReporting,
@@ -99,7 +96,7 @@ def configure_pipeline(
             ),
             output_dir=os.path.join(self.log_dir, "inference_result"),
             resume_from=resume_from,
-            max_concurrent=1,
+            max_concurrent=32,
         )
         self.preeval_data_post_processing_comp = DataProcessingConfig(
             component_type=DataProcessing,
@@ -141,9 +138,9 @@ def configure_pipeline(
                 os.path.dirname(__file__),
                 "../prompt_templates/gpqa_templates/extract_gpqa_answer.jinja",
             ),
-            llm_extractor_model_config=OAI_GPT4O_2024_11_20_CONFIG,
+            llm_extractor_model_config=kwargs.get('eval_model_config', None),
             log_dir=self.log_dir,
-            llm_extractor_max_concurrent=1,
+            llm_extractor_max_concurrent=32,
             llm_extractor_answer_transforms=[
                 RegexTransform(
                     columns="model_output",
diff --git a/eureka_ml_insights/user_configs/vision_language/maze.py b/eureka_ml_insights/user_configs/vision_language/maze.py
index 1a08d94e..a560f22d 100644
--- a/eureka_ml_insights/user_configs/vision_language/maze.py
+++ b/eureka_ml_insights/user_configs/vision_language/maze.py
@@ -35,7 +35,6 @@
     PromptProcessingConfig,
     DataJoinConfig,    
 )
-from eureka_ml_insights.configs.model_configs import OAI_GPT4O_2024_11_20_CONFIG
 
 """This file contains example user defined configuration classes for the maze task.
 In order to define a new configuration, a new class must be created that directly or indirectly
@@ -141,13 +140,13 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None,
 
         self.inference_llm_answer_extract = InferenceConfig(
             component_type=Inference,
-            model_config=OAI_GPT4O_2024_11_20_CONFIG,
+            model_config=kwargs.get("eval_model_config", None),
             data_loader_config=DataSetConfig(
                 DataLoader,
                 {"path": os.path.join(self.filter_empty_answer.output_dir, "transformed_data.jsonl")},
             ),
             output_dir=os.path.join(self.log_dir, "llm_answer_extract_inference_result"),
-            max_concurrent=1
+            max_concurrent=64,
         )        
 
         self.data_join = DataJoinConfig(
@@ -456,8 +455,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None,
 class MAZE_TEXTONLY_PIPELINE(MAZE_PIPELINE):
     """This class extends MAZE_PIPELINE to use text only data."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
-        config = super().configure_pipeline(model_config, resume_from)
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs) -> PipelineConfig:
+        config = super().configure_pipeline(model_config, resume_from, **kwargs)
         self.data_processing_comp.data_reader_config.init_args["tasks"] = (
             "maze_text_only"
         )
@@ -478,8 +477,8 @@ class MAZE_REPORTING_PIPELINE(MAZE_PIPELINE):
     """This method is used to define an eval pipeline with only a metric report component,
     on the maze dataset."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
-        super().configure_pipeline(model_config, resume_from)
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs) -> PipelineConfig:
+        super().configure_pipeline(model_config, resume_from, **kwargs)
         self.preeval_data_post_processing_comp.data_reader_config.init_args["path"] = resume_from
         # Configure the pipeline
         return PipelineConfig(
diff --git a/main.py b/main.py
index ccc9d174..5c964411 100755
--- a/main.py
+++ b/main.py
@@ -6,7 +6,10 @@
 import sys
 
 from eureka_ml_insights import user_configs as configs
-from eureka_ml_insights.configs import model_configs
+from eureka_ml_insights.configs.model_configs import (
+    OAI_GPT4O_2024_11_20_CONFIG,
+)
+from eureka_ml_insights.configs import model_configs as model_configs
 from eureka_ml_insights.core import Pipeline
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -106,6 +109,9 @@ def import_from_path(module_path, class_name):
             init_args["eval_model_config"] = getattr(model_configs, args.eval_model_config)
         except AttributeError:
             raise ValueError(f"Model config class {args.eval_model_config} not found.")
+    else:
+        logging.warning("No eval_model_config provided. Using OAI_GPT4O_2024_11_20_CONFIG for eval related LLM calls if needed.")
+        init_args["eval_model_config"] = OAI_GPT4O_2024_11_20_CONFIG
 
     if args.resume_from:
         init_args["resume_from"] = args.resume_from

From efe3d5b19b85912b8308cbbb753e78f6e85a3f0a Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Mon, 13 Oct 2025 21:28:16 +0000
Subject: [PATCH 2/3] package updates, code consistency updates

---
 environment.yml                               | 11 +++----
 .../data_utils/mathvision_utils.py            |  6 ++--
 eureka_ml_insights/user_configs/gpqa.py       | 10 ++++--
 .../image_understanding/spatial_reasoning.py  |  4 ++-
 eureka_ml_insights/user_configs/mathvision.py |  4 +--
 .../vision_language/spatial_grid.py           | 14 ++++++---
 .../vision_language/spatial_map.py            | 31 +++++++++++--------
 setup.py                                      | 10 +++---
 8 files changed, 53 insertions(+), 37 deletions(-)

diff --git a/environment.yml b/environment.yml
index dbb1a604..5b20b6a7 100644
--- a/environment.yml
+++ b/environment.yml
@@ -37,7 +37,6 @@ dependencies:
   - xz=5.6.4=h5eee18b_1
   - zlib=1.2.13=h5eee18b_1
   - pip:
-      - accelerate==1.6.0
       - anthropic==0.49.0
       - azure-ai-textanalytics>=5.3.0
       - azure-core>=1.29.5
@@ -47,10 +46,10 @@ dependencies:
       - datasets>=3.2.0
       - fuzzywuzzy>=0.18.0
       - jsonlines>=2.0.0
-      - pandas>=2.2.1
       - pillow>=10.0.1
       - torch>=2.6.0
-      - numpy==1.26.4
+      - numpy>=2.2
+      - pandas>=2.2.1
       - tqdm>=4.65.0
       - jinja2>=3.1.3
       - transformers>=4.51.3
@@ -63,8 +62,8 @@ dependencies:
       - google-generativeai>=0.7.0
       - openai>=1.35.5
       - bitsandbytes>=0.42.0
+      - pycocotools>=2.0.10
+      - vllm==0.8.5
       - accelerate>=0.21.0
-      - pycocotools>=2.0.8
-      - vllm>=0.8.5
-      - latex2sympy2>=1.9.1
+      # - latex2sympy2_extended[antlr4_13_2] # optional for mathvision
 prefix: /home/sayouse/miniconda3/envs/myenv
\ No newline at end of file
diff --git a/eureka_ml_insights/data_utils/mathvision_utils.py b/eureka_ml_insights/data_utils/mathvision_utils.py
index 48e5df9f..b7967b85 100644
--- a/eureka_ml_insights/data_utils/mathvision_utils.py
+++ b/eureka_ml_insights/data_utils/mathvision_utils.py
@@ -1,7 +1,6 @@
 """Evaluates output of models for Math-V dataset; following https://github.com/mathllm/MATH-V/tree/main/evaluation"""
 
 from dataclasses import dataclass
-from latex2sympy2 import latex2sympy
 import pandas as pd
 import re
 
@@ -47,6 +46,8 @@ def eval_tuple(s):
     Note:
         This function relies on the latex2sympy function which is assumed to be defined elsewhere in the code.
     """
+    from latex2sympy2_extended import latex2sympy
+
     # Split the string by commas to get individual elements
     sl = s[1:-1].split(',')
     
@@ -89,7 +90,8 @@ def is_equal(asw: str, gt_asw: str) -> bool:
         bool: True if the answers are equivalent, otherwise False.
 
     """
-
+    from latex2sympy2_extended import latex2sympy
+    
     # return gt_asw == asw
 
     # Check for empty strings after removing spaces and return False if any of them is empty.
diff --git a/eureka_ml_insights/user_configs/gpqa.py b/eureka_ml_insights/user_configs/gpqa.py
index 949240f4..f12ef6e8 100644
--- a/eureka_ml_insights/user_configs/gpqa.py
+++ b/eureka_ml_insights/user_configs/gpqa.py
@@ -1,3 +1,4 @@
+import logging
 import os
 
 """This file contains user defined configuration classes for the GPQA dataset."""
@@ -474,7 +475,10 @@ def configure_pipeline(
     ) -> PipelineConfig:
         pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from, **kwargs)
         # data preprocessing
-        self.data_processing_comp.data_reader_config.init_args["transform"].transforms.append(
-            MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 5)))
-        )
+        if "n_repeats" in kwargs and int(kwargs["n_repeats"]) != 5:
+            logging.warning(
+                f"n_repeats is set to {kwargs['n_repeats']} in kwargs, but will be overridden to 5 for GPQA_PIPELINE_5Run."
+            )
+        self.data_processing_comp.data_reader_config.init_args["transform"].transforms[-1] = MultiplyTransform(n_repeats=5)
+
         return pipeline
diff --git a/eureka_ml_insights/user_configs/image_understanding/spatial_reasoning.py b/eureka_ml_insights/user_configs/image_understanding/spatial_reasoning.py
index c2cbfa04..e439b8f7 100644
--- a/eureka_ml_insights/user_configs/image_understanding/spatial_reasoning.py
+++ b/eureka_ml_insights/user_configs/image_understanding/spatial_reasoning.py
@@ -1,4 +1,5 @@
 import os
+from typing import Any
 
 from eureka_ml_insights.configs.experiment_config import ExperimentConfig
 from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing
@@ -11,6 +12,7 @@
     DataReader,
     PrependStringTransform,
     SequenceTransform,
+    SamplerTransform
 )
 from eureka_ml_insights.data_utils.spatial_utils import (
     LowerCaseNoPunctuationConvertNumbers,
@@ -50,7 +52,7 @@ class SPATIAL_REASONING_PAIRS_PIPELINE(ExperimentConfig):
     There is no model_config by default and the model config must be passed in via command lime.
     """
 
-    def configure_pipeline(self, model_config, resume_from=None):
+    def configure_pipeline(self, model_config, resume_from=None, **kwargs: dict[str, Any]):
         # Configure the data processing component.
         self.data_processing_comp = PromptProcessingConfig(
             component_type=PromptProcessing,
diff --git a/eureka_ml_insights/user_configs/mathvision.py b/eureka_ml_insights/user_configs/mathvision.py
index 83cdf0ca..10c0ba67 100644
--- a/eureka_ml_insights/user_configs/mathvision.py
+++ b/eureka_ml_insights/user_configs/mathvision.py
@@ -103,7 +103,7 @@ def configure_pipeline(
         # Eval Inference component round 1 (answer extraction).
         self.eval_inference_comp = InferenceConfig(
             component_type=Inference,
-            model_config=PERSONAL_GPT4O,
+            model_config=kwargs.get("eval_model_config", PERSONAL_GPT4O),
             data_loader_config=DataSetConfig(
                 MMDataLoader,
                 {"path": os.path.join(self.eval_data_pre_processing.output_dir, "transformed_data.jsonl"), "load_images":False},
@@ -131,7 +131,7 @@ def configure_pipeline(
         # Eval Inference component round 2 (LLM scoring)
         self.eval_inference_comp_two = InferenceConfig(
             component_type=Inference,
-            model_config=PERSONAL_GPT4O,
+            model_config=kwargs.get("eval_model_config", PERSONAL_GPT4O),
             data_loader_config=DataSetConfig(
                 MMDataLoader,
                 {"path": os.path.join(self.eval_data_pre_processing_two.output_dir, "transformed_data.jsonl"), "load_images":False},
diff --git a/eureka_ml_insights/user_configs/vision_language/spatial_grid.py b/eureka_ml_insights/user_configs/vision_language/spatial_grid.py
index c45d6d7e..278bd19e 100644
--- a/eureka_ml_insights/user_configs/vision_language/spatial_grid.py
+++ b/eureka_ml_insights/user_configs/vision_language/spatial_grid.py
@@ -1,4 +1,5 @@
 import os
+from typing import Any
 
 from eureka_ml_insights.configs.experiment_config import ExperimentConfig
 from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing
@@ -11,6 +12,7 @@
     ExtractAnswerGrid,
     PrependStringTransform,
     SequenceTransform,
+    MultiplyTransform,
 )
 from eureka_ml_insights.metrics import CaseInsensitiveMatch, CountAggregator
 from eureka_ml_insights.configs import (
@@ -41,7 +43,7 @@ class SPATIAL_GRID_PIPELINE(ExperimentConfig):
     """This method is used to define an eval pipeline with inference and metric report components,
     on the grid counting dataset."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
         # Configure the data processing component.
         self.data_processing_comp = PromptProcessingConfig(
             component_type=PromptProcessing,
@@ -51,6 +53,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
                     "path": "microsoft/VISION_LANGUAGE",
                     "split": "val",
                     "tasks": "spatial_grid",
+                    "transform": MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 1))),
+
                 },
             ),
             output_dir=os.path.join(self.log_dir, "data_processing_output"),
@@ -113,8 +117,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
 class SPATIAL_GRID_TEXTONLY_PIPELINE(SPATIAL_GRID_PIPELINE):
     """This class extends SPATIAL_GRID_PIPELINE to use text only data."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
-        config = super().configure_pipeline(model_config, resume_from)
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
+        config = super().configure_pipeline(model_config, resume_from, **kwargs)
         self.data_processing_comp.data_reader_config.init_args["tasks"] = (
             "spatial_grid_text_only"
         )
@@ -125,7 +129,7 @@ class SPATIAL_GRID_REPORTING_PIPELINE(SPATIAL_GRID_PIPELINE):
     """This method is used to define an eval pipeline with only a metric report component,
     on the grid counting dataset."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
-        super().configure_pipeline(model_config, resume_from)
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
+        super().configure_pipeline(model_config, resume_from, **kwargs)
         self.evalreporting_comp.data_reader_config.init_args["path"] = resume_from
         return PipelineConfig([self.evalreporting_comp], self.log_dir)
diff --git a/eureka_ml_insights/user_configs/vision_language/spatial_map.py b/eureka_ml_insights/user_configs/vision_language/spatial_map.py
index e2d3364e..1b075840 100644
--- a/eureka_ml_insights/user_configs/vision_language/spatial_map.py
+++ b/eureka_ml_insights/user_configs/vision_language/spatial_map.py
@@ -1,4 +1,5 @@
 import os
+from typing import Any
 
 from eureka_ml_insights.configs.experiment_config import ExperimentConfig
 from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing, DataProcessing, DataJoin
@@ -20,6 +21,7 @@
     MultiplyTransform,
     SequenceTransform,
     RegexTransform,
+    SamplerTransform
 )
 from eureka_ml_insights.metrics import SubstringExistsMatch, BiLevelAggregator, BiLevelCountAggregator, CountAggregator
 
@@ -54,7 +56,7 @@ class SPATIAL_MAP_PIPELINE(ExperimentConfig):
     """This method is used to define an eval pipeline with inference and metric report components,
     on the spatial map dataset."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
         # Configure the data processing component.
         self.data_processing_comp = PromptProcessingConfig(
             component_type=PromptProcessing,
@@ -64,7 +66,10 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
                     "path": "microsoft/VISION_LANGUAGE",
                     "split": "val_noinstruction",
                     "tasks": "spatial_map",
-                    "transform": MultiplyTransform(n_repeats=5),
+                    "transform": SequenceTransform([
+                        # SamplerTransform(sample_count=10, random_seed=1),
+                        MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 1))),
+                    ]),
                 },
             ),
             prompt_template_path=os.path.join(
@@ -86,7 +91,7 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
             ),
             output_dir=os.path.join(self.log_dir, "inference_result"),
             resume_from=resume_from,
-            max_concurrent=10,
+            max_concurrent=int(kwargs.get("max_concurrent", 10)),
         )
 
         self.preeval_data_post_processing_comp = DataProcessingConfig(
@@ -141,13 +146,13 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
 
         self.inference_llm_answer_extract = InferenceConfig(
             component_type=Inference,
-            model_config=OAI_GPT4O_2024_11_20_CONFIG,
+            model_config=kwargs.get("eval_model_config", OAI_GPT4O_2024_11_20_CONFIG),
             data_loader_config=DataSetConfig(
                 DataLoader,
                 {"path": os.path.join(self.filter_empty_answer.output_dir, "transformed_data.jsonl")},
             ),
             output_dir=os.path.join(self.log_dir, "llm_answer_extract_inference_result"),
-            max_concurrent=1
+            max_concurrent=10
         )        
 
         self.data_join = DataJoinConfig(
@@ -443,8 +448,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
 class SPATIAL_MAP_COT_PIPELINE(SPATIAL_MAP_PIPELINE):
     """This class extends SPATIAL_MAP_PIPELINE to use a COT prompt."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
-        config = super().configure_pipeline(model_config, resume_from)
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
+        config = super().configure_pipeline(model_config, resume_from, **kwargs)
         self.data_processing_comp.prompt_template_path=os.path.join(
                 os.path.dirname(__file__),
                 "../../prompt_templates/vision_language_templates/cot.jinja",
@@ -454,8 +459,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
 class SPATIAL_MAP_TEXTONLY_PIPELINE(SPATIAL_MAP_PIPELINE):
     """This class extends SPATIAL_MAP_PIPELINE to use text only data."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
-        config = super().configure_pipeline(model_config, resume_from)
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
+        config = super().configure_pipeline(model_config, resume_from, **kwargs)
         self.data_processing_comp.data_reader_config.init_args["tasks"] = (
             "spatial_map_text_only"
         )
@@ -464,8 +469,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
 class SPATIAL_MAP_COT_TEXTONLY_PIPELINE(SPATIAL_MAP_COT_PIPELINE):
     """This class extends SPATIAL_MAP_PIPELINE to use text only data."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
-        config = super().configure_pipeline(model_config, resume_from)
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
+        config = super().configure_pipeline(model_config, resume_from, **kwargs)
         self.data_processing_comp.data_reader_config.init_args["tasks"] = (
             "spatial_map_text_only"
         )
@@ -476,8 +481,8 @@ class SPATIAL_MAP_REPORTING_PIPELINE(SPATIAL_MAP_PIPELINE):
     """This method is used to define an eval pipeline with only a metric report component,
     on the spatial map dataset."""
 
-    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
-        super().configure_pipeline(model_config, resume_from)
+    def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
+        super().configure_pipeline(model_config, resume_from, **kwargs)
         self.preeval_data_post_processing_comp.data_reader_config.init_args["path"] = resume_from
         # Configure the pipeline
         return PipelineConfig(
diff --git a/setup.py b/setup.py
index ebc9ac5f..f7f7f816 100644
--- a/setup.py
+++ b/setup.py
@@ -22,10 +22,10 @@
         'datasets>=3.2.0',
         'fuzzywuzzy>=0.18.0',
         'jsonlines>=2.0.0',
-        'pandas>=2.2.1',
         'pillow>=10.0.1',
         'torch>=2.6.0',
-        'numpy==1.26.4',
+        'numpy>=2.2',
+        'pandas>=2.2.1',
         'tqdm>=4.65.0',
         'jinja2>=3.1.3',
         'transformers>=4.51.3',
@@ -38,10 +38,10 @@
         'google-generativeai>=0.7.0',
         'openai>=1.35.5',
         'bitsandbytes>=0.42.0',
+        'pycocotools>=2.0.10',
+        'vllm==0.8.5',
         'accelerate>=0.21.0',
-        'pycocotools>=2.0.8',
-        'vllm>=0.8.5',
-        'latex2sympy2>=1.9.1',
+        # 'latex2sympy2_extended[antlr4_13_2]', # optional for mathvision
     ],
     extras_require={
         'llamacpp': [

From ce92f358eb50dcb6e6689f013b619cef4a451945 Mon Sep 17 00:00:00 2001
From: Vidhisha Balachandran <vidhishab@microsoft.com>
Date: Mon, 13 Oct 2025 22:27:54 +0000
Subject: [PATCH 3/3] bugfix

---
 eureka_ml_insights/user_configs/omni_math.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/eureka_ml_insights/user_configs/omni_math.py b/eureka_ml_insights/user_configs/omni_math.py
index 6dd64e97..a7d554df 100644
--- a/eureka_ml_insights/user_configs/omni_math.py
+++ b/eureka_ml_insights/user_configs/omni_math.py
@@ -63,8 +63,7 @@ def configure_pipeline(self, model_config=None, resume_from=None, eval_resume_fr
             data_loader_config=DataSetConfig(
                 DataLoader,
                 {
-                    "path": os.path.join(self.data_processing_comp.output_dir, "transformed_data.jsonl"),
-                    "misc_columns": ["data_point_id","data_repeat_id"]
+                    "path": os.path.join(self.data_processing_comp.output_dir, "transformed_data.jsonl")
                 },
             ),
             output_dir=os.path.join(self.log_dir, "inference_result"),