microsoft · ilmarinen · Apr 14, 2025 · Apr 15, 2025 · Apr 17, 2025 · Apr 19, 2025
diff --git a/eureka_ml_insights/configs/model_configs.py b/eureka_ml_insights/configs/model_configs.py
@@ -172,6 +172,15 @@
     },
 )
 
+OAI_GPT4O_2024_11_20_AUZRE_CONFIG = ModelConfig(
+    AzureOpenAIModel,
+    {
+        "model_name": "gpt-4o",
+        "url": "https://eurekaevals.openai.azure.com/",
+        "api_version": "2025-01-01-preview",
+    },
+)
+
 # Gemini models
 GEMINI_SECRET_KEY_PARAMS = {
     "key_name": "your_gemini_secret_key_name",
@@ -343,6 +352,16 @@
     },
 )
 
+DEEPSEEK_R1_LOCAL_CONFIG = ModelConfig(
+    LocalVLLMModel,
+    {
+        # this name must match the vllm deployment name/path
+        "model_name": "Deepseek-R1",
+        # specify ports in case the model is already deployed
+        "ports": ["5001"],
+    },
+)
+
 # DeepSeek R1 Endpoints on Azure
 DEEPSEEK_R1_CONFIG = ModelConfig(
     DeepseekR1ServerlessAzureRestEndpointModel,

diff --git a/eureka_ml_insights/data_utils/arc_agi_utils.py b/eureka_ml_insights/data_utils/arc_agi_utils.py
@@ -0,0 +1,69 @@
+import re
+from dataclasses import dataclass
+
+import pandas as pd
+
+from .transform import DFTransformBase
+
+
+@dataclass
+class ARCAGI_ExtractAnswer(DFTransformBase):
+    model_output_column: str
+    model_answer_column: str
+
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        df[self.model_answer_column] = df[self.model_output_column].apply(self.parse_output_answer)
+        return df
+
+    @staticmethod
+    def parse_output_answer(response):
+        """
+        Parse the input string to extract answer of a given ARCAGI question.
+        Parameters:
+            response (str): Input string containing answer X in the form of "<output>final answer string</output>".
+        Returns: 
+            answer (str): The final answer string with leading and training spaces stripped.
+        """
+        answer = ""
+
+        if response is None:
+            return ""
+        elif response.find("<output>") == -1 or response.find("</output>") == -1:
+            return ""
+
+        start_index = response.find("<output>") + len("<output>")
+        end_index = response.find("</output>")
+
+        answer = response[start_index:end_index].strip()
+
+        return answer
+
+
+@dataclass
+class ARCAGI_CleanCOTAnswer(DFTransformBase):
 self.evalreporting_comp.data_reader_config.init_args["transform"].transforms.append( 
 self.evalreporting_comp.data_reader_config.init_args["transform"].transforms.append( 
+    model_output_column: str
+    model_answer_column: str
+
+    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
+        df[self.model_answer_column] = df[self.model_output_column].apply(self.parse_output_answer)
+        return df
+
+    @staticmethod
+    def parse_output_answer(response):
+        """
+        Replace None responses with an empty string
+        Parameters:
+            response (str): Possibly None Response string
+        Returns: 
+            answer (str): Response string with None replaced by blank string
+        """
+        if response is None:
+            return ""
+
+        start_index = response.find("</think>") + len("</think>")
+        if start_index == -1:
+            return response
+
+        response = response[start_index:]
+
+        return response
diff --git a/eureka_ml_insights/prompt_templates/arc_agi_templates/arc_agi_v1_basic.jinja b/eureka_ml_insights/prompt_templates/arc_agi_templates/arc_agi_v1_basic.jinja
@@ -0,0 +1,3 @@
+You are an intelligent assistant who is very good at answering test questions accurately.
+
+{{ prompt }}
diff --git a/eureka_ml_insights/prompt_templates/arc_agi_templates/arc_agi_v1_grid_explanation.jinja b/eureka_ml_insights/prompt_templates/arc_agi_templates/arc_agi_v1_grid_explanation.jinja
@@ -0,0 +1,21 @@
+You are an intelligent assistant who is very good at answering test questions accurately.
+In the examples that follow you will be shown grids of numbers.
+The numbers in the grids range from 0 through 9.
+Each grid can be rendered as a grid of squares.
+Each square in the grid is rendered as a colored square where the color of the square is derived from the number.
+The colors are decided as follows:
+
+0 - black
+1 - blue
+2 - red
+3 - green
+4 - yellow
+5 - grey
+6 - magenta
+7 - brown
+8 - cyan
+9 - maroon
+
+With that in mind, do your best to solve the question below.
+
+{{ prompt }}
diff --git a/eureka_ml_insights/user_configs/__init__.py b/eureka_ml_insights/user_configs/__init__.py
@@ -5,6 +5,12 @@
     AIME_PIPELINE,
 )
 from .aime_seq import AIME_SEQ_PIPELINE
+from .arc_agi import (
+    ARC_AGI_v1_PIPELINE,
+    ARC_AGI_v1_PIPELINE_5Run,
+    COT_ARC_AGI_v1_PIPELINE,
+    COT_ARC_AGI_v1_PIPELINE_5Run,
+)
 from .ba_calendar import (
     BA_Calendar_Parallel_PIPELINE,
     BA_Calendar_PIPELINE,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		You are an intelligent assistant who is very good at answering test questions accurately.

		{{ prompt }}