diff --git a/environment.yml b/environment.yml index dbb1a604..5b20b6a7 100644 --- a/environment.yml +++ b/environment.yml @@ -37,7 +37,6 @@ dependencies: - xz=5.6.4=h5eee18b_1 - zlib=1.2.13=h5eee18b_1 - pip: - - accelerate==1.6.0 - anthropic==0.49.0 - azure-ai-textanalytics>=5.3.0 - azure-core>=1.29.5 @@ -47,10 +46,10 @@ dependencies: - datasets>=3.2.0 - fuzzywuzzy>=0.18.0 - jsonlines>=2.0.0 - - pandas>=2.2.1 - pillow>=10.0.1 - torch>=2.6.0 - - numpy==1.26.4 + - numpy>=2.2 + - pandas>=2.2.1 - tqdm>=4.65.0 - jinja2>=3.1.3 - transformers>=4.51.3 @@ -63,8 +62,8 @@ dependencies: - google-generativeai>=0.7.0 - openai>=1.35.5 - bitsandbytes>=0.42.0 + - pycocotools>=2.0.10 + - vllm==0.8.5 - accelerate>=0.21.0 - - pycocotools>=2.0.8 - - vllm>=0.8.5 - - latex2sympy2>=1.9.1 + # - latex2sympy2_extended[antlr4_13_2] # optional for mathvision prefix: /home/sayouse/miniconda3/envs/myenv \ No newline at end of file diff --git a/eureka_ml_insights/data_utils/mathvision_utils.py b/eureka_ml_insights/data_utils/mathvision_utils.py index 48e5df9f..b7967b85 100644 --- a/eureka_ml_insights/data_utils/mathvision_utils.py +++ b/eureka_ml_insights/data_utils/mathvision_utils.py @@ -1,7 +1,6 @@ """Evaluates output of models for Math-V dataset; following https://github.com/mathllm/MATH-V/tree/main/evaluation""" from dataclasses import dataclass -from latex2sympy2 import latex2sympy import pandas as pd import re @@ -47,6 +46,8 @@ def eval_tuple(s): Note: This function relies on the latex2sympy function which is assumed to be defined elsewhere in the code. """ + from latex2sympy2_extended import latex2sympy + # Split the string by commas to get individual elements sl = s[1:-1].split(',') @@ -89,7 +90,8 @@ def is_equal(asw: str, gt_asw: str) -> bool: bool: True if the answers are equivalent, otherwise False. """ - + from latex2sympy2_extended import latex2sympy + # return gt_asw == asw # Check for empty strings after removing spaces and return False if any of them is empty. diff --git a/eureka_ml_insights/user_configs/gpqa.py b/eureka_ml_insights/user_configs/gpqa.py index 949240f4..f12ef6e8 100644 --- a/eureka_ml_insights/user_configs/gpqa.py +++ b/eureka_ml_insights/user_configs/gpqa.py @@ -1,3 +1,4 @@ +import logging import os """This file contains user defined configuration classes for the GPQA dataset.""" @@ -474,7 +475,10 @@ def configure_pipeline( ) -> PipelineConfig: pipeline = super().configure_pipeline(model_config=model_config, resume_from=resume_from, **kwargs) # data preprocessing - self.data_processing_comp.data_reader_config.init_args["transform"].transforms.append( - MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 5))) - ) + if "n_repeats" in kwargs and int(kwargs["n_repeats"]) != 5: + logging.warning( + f"n_repeats is set to {kwargs['n_repeats']} in kwargs, but will be overridden to 5 for GPQA_PIPELINE_5Run." + ) + self.data_processing_comp.data_reader_config.init_args["transform"].transforms[-1] = MultiplyTransform(n_repeats=5) + return pipeline diff --git a/eureka_ml_insights/user_configs/image_understanding/spatial_reasoning.py b/eureka_ml_insights/user_configs/image_understanding/spatial_reasoning.py index c2cbfa04..e439b8f7 100644 --- a/eureka_ml_insights/user_configs/image_understanding/spatial_reasoning.py +++ b/eureka_ml_insights/user_configs/image_understanding/spatial_reasoning.py @@ -1,4 +1,5 @@ import os +from typing import Any from eureka_ml_insights.configs.experiment_config import ExperimentConfig from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing @@ -11,6 +12,7 @@ DataReader, PrependStringTransform, SequenceTransform, + SamplerTransform ) from eureka_ml_insights.data_utils.spatial_utils import ( LowerCaseNoPunctuationConvertNumbers, @@ -50,7 +52,7 @@ class SPATIAL_REASONING_PAIRS_PIPELINE(ExperimentConfig): There is no model_config by default and the model config must be passed in via command lime. """ - def configure_pipeline(self, model_config, resume_from=None): + def configure_pipeline(self, model_config, resume_from=None, **kwargs: dict[str, Any]): # Configure the data processing component. self.data_processing_comp = PromptProcessingConfig( component_type=PromptProcessing, diff --git a/eureka_ml_insights/user_configs/mathvision.py b/eureka_ml_insights/user_configs/mathvision.py index 83cdf0ca..10c0ba67 100644 --- a/eureka_ml_insights/user_configs/mathvision.py +++ b/eureka_ml_insights/user_configs/mathvision.py @@ -103,7 +103,7 @@ def configure_pipeline( # Eval Inference component round 1 (answer extraction). self.eval_inference_comp = InferenceConfig( component_type=Inference, - model_config=PERSONAL_GPT4O, + model_config=kwargs.get("eval_model_config", PERSONAL_GPT4O), data_loader_config=DataSetConfig( MMDataLoader, {"path": os.path.join(self.eval_data_pre_processing.output_dir, "transformed_data.jsonl"), "load_images":False}, @@ -131,7 +131,7 @@ def configure_pipeline( # Eval Inference component round 2 (LLM scoring) self.eval_inference_comp_two = InferenceConfig( component_type=Inference, - model_config=PERSONAL_GPT4O, + model_config=kwargs.get("eval_model_config", PERSONAL_GPT4O), data_loader_config=DataSetConfig( MMDataLoader, {"path": os.path.join(self.eval_data_pre_processing_two.output_dir, "transformed_data.jsonl"), "load_images":False}, diff --git a/eureka_ml_insights/user_configs/omni_math.py b/eureka_ml_insights/user_configs/omni_math.py index 6dd64e97..a7d554df 100644 --- a/eureka_ml_insights/user_configs/omni_math.py +++ b/eureka_ml_insights/user_configs/omni_math.py @@ -63,8 +63,7 @@ def configure_pipeline(self, model_config=None, resume_from=None, eval_resume_fr data_loader_config=DataSetConfig( DataLoader, { - "path": os.path.join(self.data_processing_comp.output_dir, "transformed_data.jsonl"), - "misc_columns": ["data_point_id","data_repeat_id"] + "path": os.path.join(self.data_processing_comp.output_dir, "transformed_data.jsonl") }, ), output_dir=os.path.join(self.log_dir, "inference_result"), diff --git a/eureka_ml_insights/user_configs/vision_language/spatial_grid.py b/eureka_ml_insights/user_configs/vision_language/spatial_grid.py index c45d6d7e..278bd19e 100644 --- a/eureka_ml_insights/user_configs/vision_language/spatial_grid.py +++ b/eureka_ml_insights/user_configs/vision_language/spatial_grid.py @@ -1,4 +1,5 @@ import os +from typing import Any from eureka_ml_insights.configs.experiment_config import ExperimentConfig from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing @@ -11,6 +12,7 @@ ExtractAnswerGrid, PrependStringTransform, SequenceTransform, + MultiplyTransform, ) from eureka_ml_insights.metrics import CaseInsensitiveMatch, CountAggregator from eureka_ml_insights.configs import ( @@ -41,7 +43,7 @@ class SPATIAL_GRID_PIPELINE(ExperimentConfig): """This method is used to define an eval pipeline with inference and metric report components, on the grid counting dataset.""" - def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig: + def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig: # Configure the data processing component. self.data_processing_comp = PromptProcessingConfig( component_type=PromptProcessing, @@ -51,6 +53,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) "path": "microsoft/VISION_LANGUAGE", "split": "val", "tasks": "spatial_grid", + "transform": MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 1))), + }, ), output_dir=os.path.join(self.log_dir, "data_processing_output"), @@ -113,8 +117,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) class SPATIAL_GRID_TEXTONLY_PIPELINE(SPATIAL_GRID_PIPELINE): """This class extends SPATIAL_GRID_PIPELINE to use text only data.""" - def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig: - config = super().configure_pipeline(model_config, resume_from) + def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig: + config = super().configure_pipeline(model_config, resume_from, **kwargs) self.data_processing_comp.data_reader_config.init_args["tasks"] = ( "spatial_grid_text_only" ) @@ -125,7 +129,7 @@ class SPATIAL_GRID_REPORTING_PIPELINE(SPATIAL_GRID_PIPELINE): """This method is used to define an eval pipeline with only a metric report component, on the grid counting dataset.""" - def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig: - super().configure_pipeline(model_config, resume_from) + def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig: + super().configure_pipeline(model_config, resume_from, **kwargs) self.evalreporting_comp.data_reader_config.init_args["path"] = resume_from return PipelineConfig([self.evalreporting_comp], self.log_dir) diff --git a/eureka_ml_insights/user_configs/vision_language/spatial_map.py b/eureka_ml_insights/user_configs/vision_language/spatial_map.py index e2d3364e..1b075840 100644 --- a/eureka_ml_insights/user_configs/vision_language/spatial_map.py +++ b/eureka_ml_insights/user_configs/vision_language/spatial_map.py @@ -1,4 +1,5 @@ import os +from typing import Any from eureka_ml_insights.configs.experiment_config import ExperimentConfig from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing, DataProcessing, DataJoin @@ -20,6 +21,7 @@ MultiplyTransform, SequenceTransform, RegexTransform, + SamplerTransform ) from eureka_ml_insights.metrics import SubstringExistsMatch, BiLevelAggregator, BiLevelCountAggregator, CountAggregator @@ -54,7 +56,7 @@ class SPATIAL_MAP_PIPELINE(ExperimentConfig): """This method is used to define an eval pipeline with inference and metric report components, on the spatial map dataset.""" - def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig: + def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig: # Configure the data processing component. self.data_processing_comp = PromptProcessingConfig( component_type=PromptProcessing, @@ -64,7 +66,10 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) "path": "microsoft/VISION_LANGUAGE", "split": "val_noinstruction", "tasks": "spatial_map", - "transform": MultiplyTransform(n_repeats=5), + "transform": SequenceTransform([ + # SamplerTransform(sample_count=10, random_seed=1), + MultiplyTransform(n_repeats=int(kwargs.get("n_repeats", 1))), + ]), }, ), prompt_template_path=os.path.join( @@ -86,7 +91,7 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) ), output_dir=os.path.join(self.log_dir, "inference_result"), resume_from=resume_from, - max_concurrent=10, + max_concurrent=int(kwargs.get("max_concurrent", 10)), ) self.preeval_data_post_processing_comp = DataProcessingConfig( @@ -141,13 +146,13 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) self.inference_llm_answer_extract = InferenceConfig( component_type=Inference, - model_config=OAI_GPT4O_2024_11_20_CONFIG, + model_config=kwargs.get("eval_model_config", OAI_GPT4O_2024_11_20_CONFIG), data_loader_config=DataSetConfig( DataLoader, {"path": os.path.join(self.filter_empty_answer.output_dir, "transformed_data.jsonl")}, ), output_dir=os.path.join(self.log_dir, "llm_answer_extract_inference_result"), - max_concurrent=1 + max_concurrent=10 ) self.data_join = DataJoinConfig( @@ -443,8 +448,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) class SPATIAL_MAP_COT_PIPELINE(SPATIAL_MAP_PIPELINE): """This class extends SPATIAL_MAP_PIPELINE to use a COT prompt.""" - def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig: - config = super().configure_pipeline(model_config, resume_from) + def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig: + config = super().configure_pipeline(model_config, resume_from, **kwargs) self.data_processing_comp.prompt_template_path=os.path.join( os.path.dirname(__file__), "../../prompt_templates/vision_language_templates/cot.jinja", @@ -454,8 +459,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) class SPATIAL_MAP_TEXTONLY_PIPELINE(SPATIAL_MAP_PIPELINE): """This class extends SPATIAL_MAP_PIPELINE to use text only data.""" - def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig: - config = super().configure_pipeline(model_config, resume_from) + def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig: + config = super().configure_pipeline(model_config, resume_from, **kwargs) self.data_processing_comp.data_reader_config.init_args["tasks"] = ( "spatial_map_text_only" ) @@ -464,8 +469,8 @@ def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) class SPATIAL_MAP_COT_TEXTONLY_PIPELINE(SPATIAL_MAP_COT_PIPELINE): """This class extends SPATIAL_MAP_PIPELINE to use text only data.""" - def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig: - config = super().configure_pipeline(model_config, resume_from) + def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig: + config = super().configure_pipeline(model_config, resume_from, **kwargs) self.data_processing_comp.data_reader_config.init_args["tasks"] = ( "spatial_map_text_only" ) @@ -476,8 +481,8 @@ class SPATIAL_MAP_REPORTING_PIPELINE(SPATIAL_MAP_PIPELINE): """This method is used to define an eval pipeline with only a metric report component, on the spatial map dataset.""" - def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None) -> PipelineConfig: - super().configure_pipeline(model_config, resume_from) + def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any]) -> PipelineConfig: + super().configure_pipeline(model_config, resume_from, **kwargs) self.preeval_data_post_processing_comp.data_reader_config.init_args["path"] = resume_from # Configure the pipeline return PipelineConfig( diff --git a/setup.py b/setup.py index ebc9ac5f..f7f7f816 100644 --- a/setup.py +++ b/setup.py @@ -22,10 +22,10 @@ 'datasets>=3.2.0', 'fuzzywuzzy>=0.18.0', 'jsonlines>=2.0.0', - 'pandas>=2.2.1', 'pillow>=10.0.1', 'torch>=2.6.0', - 'numpy==1.26.4', + 'numpy>=2.2', + 'pandas>=2.2.1', 'tqdm>=4.65.0', 'jinja2>=3.1.3', 'transformers>=4.51.3', @@ -38,10 +38,10 @@ 'google-generativeai>=0.7.0', 'openai>=1.35.5', 'bitsandbytes>=0.42.0', + 'pycocotools>=2.0.10', + 'vllm==0.8.5', 'accelerate>=0.21.0', - 'pycocotools>=2.0.8', - 'vllm>=0.8.5', - 'latex2sympy2>=1.9.1', + # 'latex2sympy2_extended[antlr4_13_2]', # optional for mathvision ], extras_require={ 'llamacpp': [