Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ dependencies:
- xz=5.6.4=h5eee18b_1
- zlib=1.2.13=h5eee18b_1
- pip:
- accelerate==1.6.0
- anthropic==0.49.0
- azure-ai-textanalytics>=5.3.0
- azure-core>=1.29.5
Expand All @@ -47,10 +46,10 @@ dependencies:
- datasets>=3.2.0
- fuzzywuzzy>=0.18.0
- jsonlines>=2.0.0
- pandas>=2.2.1
- pillow>=10.0.1
- torch>=2.6.0
- numpy==1.26.4
- numpy>=2.2
- pandas>=2.2.1
- tqdm>=4.65.0
- jinja2>=3.1.3
- transformers>=4.51.3
Expand All @@ -63,8 +62,8 @@ dependencies:
- google-generativeai>=0.7.0
- openai>=1.35.5
- bitsandbytes>=0.42.0
- pycocotools>=2.0.10
- vllm==0.8.5
- accelerate>=0.21.0
- pycocotools>=2.0.8
- vllm>=0.8.5
- latex2sympy2>=1.9.1
# - latex2sympy2_extended[antlr4_13_2] # optional for mathvision
prefix: /home/sayouse/miniconda3/envs/myenv
6 changes: 4 additions & 2 deletions eureka_ml_insights/data_utils/mathvision_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Evaluates output of models for Math-V dataset; following https://github.com/mathllm/MATH-V/tree/main/evaluation"""

from dataclasses import dataclass
from latex2sympy2 import latex2sympy
import pandas as pd
import re

Expand Down Expand Up @@ -47,6 +46,8 @@ def eval_tuple(s):
Note:
This function relies on the latex2sympy function which is assumed to be defined elsewhere in the code.
"""
from latex2sympy2_extended import latex2sympy

# Split the string by commas to get individual elements
sl = s[1:-1].split(',')

Expand Down Expand Up @@ -89,7 +90,8 @@ def is_equal(asw: str, gt_asw: str) -> bool:
bool: True if the answers are equivalent, otherwise False.

"""

from latex2sympy2_extended import latex2sympy

# return gt_asw == asw

# Check for empty strings after removing spaces and return False if any of them is empty.
Expand Down
10 changes: 7 additions & 3 deletions eureka_ml_insights/user_configs/gpqa.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os

"""This file contains user defined configuration classes for the GPQA dataset."""
Expand Down Expand Up @@ -474,7 +475,10 @@ def configure_pipeline(
) -> PipelineConfig:
pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from, **kwargs)
# data preprocessing
self.data_processing_comp.data_reader_config.init_args["transform"].transforms.append(
MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 5)))
)
if "n_repeats" in kwargs and int(kwargs["n_repeats"]) != 5:
logging.warning(
f"n_repeats is set to {kwargs['n_repeats']} in kwargs, but will be overridden to 5 for GPQA_PIPELINE_5Run."
)
self.data_processing_comp.data_reader_config.init_args["transform"].transforms[-1] = MultiplyTransform(n_repeats=5)

return pipeline
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from typing import Any

from eureka_ml_insights.configs.experiment_config import ExperimentConfig
from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing
Expand All @@ -11,6 +12,7 @@
DataReader,
PrependStringTransform,
SequenceTransform,
SamplerTransform
)
from eureka_ml_insights.data_utils.spatial_utils import (
LowerCaseNoPunctuationConvertNumbers,
Expand Down Expand Up @@ -50,7 +52,7 @@ class SPATIAL_REASONING_PAIRS_PIPELINE(ExperimentConfig):
There is no model_config by default and the model config must be passed in via command lime.
"""

def configure_pipeline(self, model_config, resume_from=None):
def configure_pipeline(self, model_config, resume_from=None, **kwargs: dict[str, Any]):
# Configure the data processing component.
self.data_processing_comp = PromptProcessingConfig(
component_type=PromptProcessing,
Expand Down
4 changes: 2 additions & 2 deletions eureka_ml_insights/user_configs/mathvision.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def configure_pipeline(
# Eval Inference component round 1 (answer extraction).
self.eval_inference_comp = InferenceConfig(
component_type=Inference,
model_config=PERSONAL_GPT4O,
model_config=kwargs.get("eval_model_config", PERSONAL_GPT4O),
data_loader_config=DataSetConfig(
MMDataLoader,
{"path": os.path.join(self.eval_data_pre_processing.output_dir, "transformed_data.jsonl"), "load_images":False},
Expand Down Expand Up @@ -131,7 +131,7 @@ def configure_pipeline(
# Eval Inference component round 2 (LLM scoring)
self.eval_inference_comp_two = InferenceConfig(
component_type=Inference,
model_config=PERSONAL_GPT4O,
model_config=kwargs.get("eval_model_config", PERSONAL_GPT4O),
data_loader_config=DataSetConfig(
MMDataLoader,
{"path": os.path.join(self.eval_data_pre_processing_two.output_dir, "transformed_data.jsonl"), "load_images":False},
Expand Down
3 changes: 1 addition & 2 deletions eureka_ml_insights/user_configs/omni_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ def configure_pipeline(self, model_config=None, resume_from=None, eval_resume_fr
data_loader_config=DataSetConfig(
DataLoader,
{
"path": os.path.join(self.data_processing_comp.output_dir, "transformed_data.jsonl"),
"misc_columns": ["data_point_id","data_repeat_id"]
"path": os.path.join(self.data_processing_comp.output_dir, "transformed_data.jsonl")
},
),
output_dir=os.path.join(self.log_dir, "inference_result"),
Expand Down
14 changes: 9 additions & 5 deletions eureka_ml_insights/user_configs/vision_language/spatial_grid.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from typing import Any

from eureka_ml_insights.configs.experiment_config import ExperimentConfig
from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing
Expand All @@ -11,6 +12,7 @@
ExtractAnswerGrid,
PrependStringTransform,
SequenceTransform,
MultiplyTransform,
)
from eureka_ml_insights.metrics import CaseInsensitiveMatch, CountAggregator
from eureka_ml_insights.configs import (
Expand Down Expand Up @@ -41,7 +43,7 @@ class SPATIAL_GRID_PIPELINE(ExperimentConfig):
"""This method is used to define an eval pipeline with inference and metric report components,
on the grid counting dataset."""

def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
# Configure the data processing component.
self.data_processing_comp = PromptProcessingConfig(
component_type=PromptProcessing,
Expand All @@ -51,6 +53,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
"path": "microsoft/VISION_LANGUAGE",
"split": "val",
"tasks": "spatial_grid",
"transform": MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 1))),

},
),
output_dir=os.path.join(self.log_dir, "data_processing_output"),
Expand Down Expand Up @@ -113,8 +117,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
class SPATIAL_GRID_TEXTONLY_PIPELINE(SPATIAL_GRID_PIPELINE):
"""This class extends SPATIAL_GRID_PIPELINE to use text only data."""

def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
config = super().configure_pipeline(model_config, resume_from)
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
config = super().configure_pipeline(model_config, resume_from, **kwargs)
self.data_processing_comp.data_reader_config.init_args["tasks"] = (
"spatial_grid_text_only"
)
Expand All @@ -125,7 +129,7 @@ class SPATIAL_GRID_REPORTING_PIPELINE(SPATIAL_GRID_PIPELINE):
"""This method is used to define an eval pipeline with only a metric report component,
on the grid counting dataset."""

def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
super().configure_pipeline(model_config, resume_from)
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
super().configure_pipeline(model_config, resume_from, **kwargs)
self.evalreporting_comp.data_reader_config.init_args["path"] = resume_from
return PipelineConfig([self.evalreporting_comp], self.log_dir)
31 changes: 18 additions & 13 deletions eureka_ml_insights/user_configs/vision_language/spatial_map.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from typing import Any

from eureka_ml_insights.configs.experiment_config import ExperimentConfig
from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing, DataProcessing, DataJoin
Expand All @@ -20,6 +21,7 @@
MultiplyTransform,
SequenceTransform,
RegexTransform,
SamplerTransform
)
from eureka_ml_insights.metrics import SubstringExistsMatch, BiLevelAggregator, BiLevelCountAggregator, CountAggregator

Expand Down Expand Up @@ -54,7 +56,7 @@ class SPATIAL_MAP_PIPELINE(ExperimentConfig):
"""This method is used to define an eval pipeline with inference and metric report components,
on the spatial map dataset."""

def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
# Configure the data processing component.
self.data_processing_comp = PromptProcessingConfig(
component_type=PromptProcessing,
Expand All @@ -64,7 +66,10 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
"path": "microsoft/VISION_LANGUAGE",
"split": "val_noinstruction",
"tasks": "spatial_map",
"transform": MultiplyTransform(n_repeats=5),
"transform": SequenceTransform([
# SamplerTransform(sample_count=10, random_seed=1),
MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 1))),
]),
},
),
prompt_template_path=os.path.join(
Expand All @@ -86,7 +91,7 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
),
output_dir=os.path.join(self.log_dir, "inference_result"),
resume_from=resume_from,
max_concurrent=10,
max_concurrent=int(kwargs.get("max_concurrent", 10)),
)

self.preeval_data_post_processing_comp = DataProcessingConfig(
Expand Down Expand Up @@ -141,13 +146,13 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)

self.inference_llm_answer_extract = InferenceConfig(
component_type=Inference,
model_config=OAI_GPT4O_2024_11_20_CONFIG,
model_config=kwargs.get("eval_model_config", OAI_GPT4O_2024_11_20_CONFIG),
data_loader_config=DataSetConfig(
DataLoader,
{"path": os.path.join(self.filter_empty_answer.output_dir, "transformed_data.jsonl")},
),
output_dir=os.path.join(self.log_dir, "llm_answer_extract_inference_result"),
max_concurrent=1
max_concurrent=10
)

self.data_join = DataJoinConfig(
Expand Down Expand Up @@ -443,8 +448,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
class SPATIAL_MAP_COT_PIPELINE(SPATIAL_MAP_PIPELINE):
"""This class extends SPATIAL_MAP_PIPELINE to use a COT prompt."""

def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
config = super().configure_pipeline(model_config, resume_from)
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
config = super().configure_pipeline(model_config, resume_from, **kwargs)
self.data_processing_comp.prompt_template_path=os.path.join(
os.path.dirname(__file__),
"../../prompt_templates/vision_language_templates/cot.jinja",
Expand All @@ -454,8 +459,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
class SPATIAL_MAP_TEXTONLY_PIPELINE(SPATIAL_MAP_PIPELINE):
"""This class extends SPATIAL_MAP_PIPELINE to use text only data."""

def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
config = super().configure_pipeline(model_config, resume_from)
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
config = super().configure_pipeline(model_config, resume_from, **kwargs)
self.data_processing_comp.data_reader_config.init_args["tasks"] = (
"spatial_map_text_only"
)
Expand All @@ -464,8 +469,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None)
class SPATIAL_MAP_COT_TEXTONLY_PIPELINE(SPATIAL_MAP_COT_PIPELINE):
"""This class extends SPATIAL_MAP_PIPELINE to use text only data."""

def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
config = super().configure_pipeline(model_config, resume_from)
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
config = super().configure_pipeline(model_config, resume_from, **kwargs)
self.data_processing_comp.data_reader_config.init_args["tasks"] = (
"spatial_map_text_only"
)
Expand All @@ -476,8 +481,8 @@ class SPATIAL_MAP_REPORTING_PIPELINE(SPATIAL_MAP_PIPELINE):
"""This method is used to define an eval pipeline with only a metric report component,
on the spatial map dataset."""

def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig:
super().configure_pipeline(model_config, resume_from)
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig:
super().configure_pipeline(model_config, resume_from, **kwargs)
self.preeval_data_post_processing_comp.data_reader_config.init_args["path"] = resume_from
# Configure the pipeline
return PipelineConfig(
Expand Down
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
'datasets>=3.2.0',
'fuzzywuzzy>=0.18.0',
'jsonlines>=2.0.0',
'pandas>=2.2.1',
'pillow>=10.0.1',
'torch>=2.6.0',
'numpy==1.26.4',
'numpy>=2.2',
'pandas>=2.2.1',
'tqdm>=4.65.0',
'jinja2>=3.1.3',
'transformers>=4.51.3',
Expand All @@ -38,10 +38,10 @@
'google-generativeai>=0.7.0',
'openai>=1.35.5',
'bitsandbytes>=0.42.0',
'pycocotools>=2.0.10',
'vllm==0.8.5',
'accelerate>=0.21.0',
'pycocotools>=2.0.8',
'vllm>=0.8.5',
'latex2sympy2>=1.9.1',
# 'latex2sympy2_extended[antlr4_13_2]', # optional for mathvision
],
extras_require={
'llamacpp': [
Expand Down
Loading