SkyworkAI
diff --git a/‎configs/base.py‎
Lines changed: 9 additions & 0 deletions b/‎configs/base.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎configs/config_oai_deep_research.py‎
Lines changed: 37 additions & 0 deletions b/‎configs/config_oai_deep_research.py‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎examples/run_oai_deep_research.py‎
Lines changed: 219 additions & 0 deletions b/‎examples/run_oai_deep_research.py‎
Lines changed: 219 additions & 0 deletions
diff --git a/‎src/mcp/local/mcp_tools_registry.json‎
Lines changed: 0 additions & 26 deletions b/‎src/mcp/local/mcp_tools_registry.json‎
Lines changed: 0 additions & 26 deletions
diff --git a/‎src/mcp/server.py‎
Lines changed: 2 additions & 0 deletions b/‎src/mcp/server.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/models/models.py‎
Lines changed: 6 additions & 7 deletions b/‎src/models/models.py‎
Lines changed: 6 additions & 7 deletions
@@ -61,4 +61,13 @@
     analyzer_model_id = "o3",
     predict_model_id = "veo3-predict",
     fetch_model_id = "veo3-fetch",
+)
+
+file_reader_tool_config = dict(
+    type="file_reader_tool"
+)
+
+oai_deep_research_tool_config = dict(
+    type="oai_deep_research_tool",
+    model_id = "o3-deep-research",
 )
@@ -0,0 +1,37 @@
+_base_ = './base.py'
+
+# General Config
+tag = "oai_deep_research-o3"
+concurrency = 4
+workdir = "workdir"
+log_path = "log.txt"
+save_path = "dra.jsonl"
+use_local_proxy = True # True for local proxy, False for public proxy
+
+use_hierarchical_agent = False
+
+dataset = dict(
+    type="gaia_dataset",
+    name="2023_all",
+    path="data/GAIA",
+    split="test",
+)
+
+oai_deep_research_tool_config = dict(
+    type="oai_deep_research_tool",
+    model_id = "o3-deep-research",
+)
+
+oai_deep_research_agent_config = dict(
+    type="general_agent",
+    name="oai_deep_research_agent",
+    model_id="gpt-4.1",
+    description = "A general agent that can perform deep research using openai's deep research capabilities.",
+    max_steps = 20,
+    template_path = "src/agent/general_agent/prompts/general_agent.yaml",
+    provide_run_summary = True,
+    tools = ["oai_deep_research_tool", "deep_analyzer_tool"],
+    mcp_tools = [],
+)
+
+agent_config = oai_deep_research_agent_config
@@ -0,0 +1,219 @@
+import warnings
+warnings.simplefilter("ignore", DeprecationWarning)
+
+import os
+import sys
+from pathlib import Path
+import pandas as pd
+from typing import List
+import json
+from datetime import datetime
+import asyncio
+import threading
+import argparse
+from mmengine import DictAction
+
+root = str(Path(__file__).resolve().parents[1])
+sys.path.append(root)
+
+from src.logger import logger
+from src.config import config
+from src.models import model_manager
+from src.metric import question_scorer
+from src.agent import create_agent, prepare_response
+from src.registry import DATASET
+from src.tools import FileReaderTool
+
+append_answer_lock = threading.Lock()
+
+def append_answer(entry: dict, jsonl_file: str) -> None:
+    jsonl_file = Path(jsonl_file)
+    jsonl_file.parent.mkdir(parents=True, exist_ok=True)
+    with append_answer_lock, open(jsonl_file, "a", encoding="utf-8") as fp:
+        fp.write(json.dumps(entry) + "\n")
+    assert os.path.exists(jsonl_file), "File not found!"
+    print("Answer exported to file:", jsonl_file.resolve())
+
+def filter_answers(answers_file):
+    answer_df = pd.read_json(answers_file, lines=True)
+
+    filttered_df = []
+    for row in answer_df.iterrows():
+        row = row[1]
+
+        prediction = row['prediction']
+        truth = row['true_answer']
+
+        # If the prediction is "Unable to determine", we set it to None
+        if str(prediction) == "Unable to determine":
+            prediction = None
+
+        # Processing the test dataset that not contains the true answer
+        if truth == "?":
+            if prediction is not None:
+                filttered_df.append(row)
+        # Processing the validation dataset that contains the true answer
+        else:
+            if prediction is not None:
+                prediction = str(prediction)
+                score = question_scorer(prediction, truth)
+                if score:
+                    filttered_df.append(row)
+
+    filttered_df = pd.DataFrame(filttered_df)
+    filttered_df.to_json(answers_file, lines=True, orient='records')
+
+    logger.info(f"Previous answers filtered! {len(answer_df)} -> {len(filttered_df)}")
+
+def get_tasks_to_run(answers_file, dataset) -> List[dict]:
+    
+    data = dataset.data
+
+    logger.info(f"Loading answers from {answers_file}...")
+    try:
+        if os.path.exists(answers_file):
+            logger.info("Filtering answers starting.")
+            filter_answers(answers_file)
+            logger.info("Filtering answers ending.")
+
+            df = pd.read_json(answers_file, lines=True)
+            if "task_id" not in df.columns:
+                logger.warning(f"Answers file {answers_file} does not contain 'task_id' column. Please check the file format.")
+                return []
+            done_questions = df["task_id"].tolist()
+            logger.info(f"Found {len(done_questions)} previous results!")
+        else:
+            done_questions = []
+    except Exception as e:
+        logger.warning("Error when loading records: ", e)
+        logger.warning("No usable records! ▶️ Starting new.")
+        done_questions = []
+    return [line for line in data.to_dict(orient="records") if line["task_id"] not in done_questions]
+
+async def answer_single_question(config, example):
+
+    try:
+        agent = await create_agent(config)
+        logger.visualize_agent_tree(agent)
+
+        logger.info(f"Task Id: {example['task_id']}, Final Answer: {example['true_answer']}")
+
+        augmented_question = example["question"]
+        file_reader_tool = FileReaderTool(text_limit=50000)
+
+        if example["file_name"]:
+            prompt_use_files = "\n\nTo solve the task above, you will have to use these attached files:\n"
+            file_description = f" - Attached file: {example['file_name']}"
+            file_text = await file_reader_tool.forward(file_path=example["file_name"])
+            if file_text.error:
+                logger.warning(f"Error reading file {example['file_name']}: {file_text.error}")
+                file_text = "Unable to read the file."
+            else:
+                file_text = file_text.output
+            file_description += f"\n{file_text}"
+            prompt_use_files += file_description
+            augmented_question += prompt_use_files
+
+        start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+        # Run agent 🚀
+        final_result = await agent.run(task=augmented_question)
+
+        agent_memory = await agent.write_memory_to_messages(summary_mode=True)
+
+        final_result = await prepare_response(augmented_question,
+                                              agent_memory,
+                                              reformulation_model=model_manager.registed_models["gpt-4.1"])
+
+        output = str(final_result)
+        for memory_step in agent.memory.steps:
+            memory_step.model_input_messages = None
+        intermediate_steps = [str(step) for step in agent.memory.steps]
+
+        # Check for parsing errors which indicate the LLM failed to follow the required format
+        parsing_error = True if any(["AgentParsingError" in step for step in intermediate_steps]) else False
+
+        # check if iteration limit exceeded
+        iteration_limit_exceeded = True if "Agent stopped due to iteration limit or time limit." in output else False
+        raised_exception = False
+
+    except Exception as e:
+        logger.info("Error on ", augmented_question, e)
+        output = None
+        intermediate_steps = []
+        parsing_error = False
+        iteration_limit_exceeded = False
+        exception = e
+        raised_exception = True
+    end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    annotated_example = {
+        "agent_name": config.agent_config.name,
+        "question": example["question"],
+        "augmented_question": augmented_question,
+        "prediction": output,
+        "intermediate_steps": intermediate_steps,
+        "parsing_error": parsing_error,
+        "iteration_limit_exceeded": iteration_limit_exceeded,
+        "agent_error": str(exception) if raised_exception else None,
+        "start_time": start_time,
+        "end_time": end_time,
+        "task": example["task"],
+        "task_id": example["task_id"],
+        "true_answer": example["true_answer"],
+    }
+    append_answer(annotated_example, config.save_path)
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='main')
+    parser.add_argument("--config", default=os.path.join(root, "configs", "config_oai_deep_research.py"), help="config file path")
+
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+
+async def main():
+    # Parse command line arguments
+    args = parse_args()
+
+    # Initialize the configuration
+    config.init_config(args.config, args)
+
+    # Initialize the logger
+    logger.init_logger(log_path=config.log_path)
+    logger.info(f"| Logger initialized at: {config.log_path}")
+    logger.info(f"| Config:\n{config.pretty_text}")
+
+    # Registed models
+    model_manager.init_models(use_local_proxy=True)
+    logger.info("| Registed models: %s", ", ".join(model_manager.registed_models.keys()))
+    
+    # Load dataset
+    dataset = DATASET.build(config.dataset)
+    logger.info(f"| Loaded dataset: {len(dataset)} examples.")
+
+    # Load answers
+    tasks_to_run = get_tasks_to_run(config.save_path, dataset)
+    tasks_to_run = [task for task in tasks_to_run[:-1]] # Remove the last task which is a test example
+    logger.info(f"| Loaded {len(tasks_to_run)} tasks to run.")
+
+    await answer_single_question(config, [task for task in tasks_to_run if task["task_id"] == "16cf70d8-9263-4eb0-a8a9-5eb91a23b462"][0])  # Run test example first
+    exit()
+
+    # Run tasks
+    batch_size = getattr(config, "concurrency", 4)
+    for i in range(0, len(tasks_to_run), batch_size):
+        batch = tasks_to_run[i:min(i + batch_size, len(tasks_to_run))]
+        await asyncio.gather(*[answer_single_question(config, task) for task in batch])
+        logger.info(f"| Batch {i // batch_size + 1} done.")
+
+if __name__ == '__main__':
+    asyncio.run(main())
@@ -456,32 +456,6 @@
     "usage_count": 2,
     "last_used": "2025-08-01T17:01:50.743059"
   },
-  {
-    "name": "extract_colored_numbers_from_image",
-    "description": "Extracts numbers from an image and categorizes them into two lists based on their color: red or green.",
-    "function": null,
-    "metadata": {
-      "name": "extract_colored_numbers_from_image",
-      "description": "Extracts numbers from an image and categorizes them into two lists based on their color: red or green.",
-      "requires": "cv2, pytesseract, numpy, typing",
-      "args": [
-        "image_path (str): Path to the input image file.",
-        "red_color_lower_bound (tuple): Lower bound for red color in HSV format (H, S, V).",
-        "red_color_upper_bound (tuple): Upper bound for red color in HSV format (H, S, V).",
-        "green_color_lower_bound (tuple): Lower bound for green color in HSV format (H, S, V).",
-        "green_color_upper_bound (tuple): Upper bound for green color in HSV format (H, S, V).",
-        "ocr_config (str): Configuration string for Tesseract OCR.",
-        "min_contour_area (int): The minimum area (in pixels) for a contour to be considered a number."
-      ],
-      "returns": [
-        "result (dict): A dictionary with 'red_numbers' and 'green_numbers' as keys, containing their respective lists of extracted integers."
-      ]
-    },
-    "script_content": "```python\n# MCP Name: extract_colored_numbers_from_image\n# Description: Extracts numbers from an image and categorizes them into two lists based on their color: red or green.\n# Arguments:\n#   image_path (str): Path to the input image file.\n#   red_color_lower_bound (tuple): Lower bound for red color in HSV format (H, S, V).\n#   red_color_upper_bound (tuple): Upper bound for red color in HSV format (H, S, V).\n#   green_color_lower_bound (tuple): Lower bound for green color in HSV format (H, S, V).\n#   green_color_upper_bound (tuple): Upper bound for green color in HSV format (H, S, V).\n#   ocr_config (str): Configuration string for Tesseract OCR.\n#   min_contour_area (int): The minimum area (in pixels) for a contour to be considered a number.\n# Returns:\n#   result (dict): A dictionary with 'red_numbers' and 'green_numbers' as keys, containing their respective lists of extracted integers.\n# Requires: cv2, pytesseract, numpy, typing\n\nimport cv2\nimport pytesseract\nimport numpy as np\nfrom typing import Tuple, List, Dict\n\ndef extract_colored_numbers_from_image(\n    image_path: str,\n    red_color_lower_bound: Tuple[int, int, int],\n    red_color_upper_bound: Tuple[int, int, int],\n    green_color_lower_bound: Tuple[int, int, int],\n    green_color_upper_bound: Tuple[int, int, int],\n    ocr_config: str,\n    min_contour_area: int\n) -> Dict[str, List[int]]:\n    \"\"\"\n    Extracts numbers from an image and categorizes them into two lists based on their color: red or green.\n\n    This function reads an image, converts it to the HSV color space, and then creates binary masks\n    for the specified red and green color ranges. It finds contours in these masks, filters them by area,\n    and then performs Optical Character Recognition (OCR) on each valid contour to extract the numbers.\n\n    Args:\n        image_path (str): Path to the input image file.\n        red_color_lower_bound (Tuple[int, int, int]): Lower bound for red color in HSV format (H, S, V).\n        red_color_upper_bound (Tuple[int, int, int]): Upper bound for red color in HSV format (H, S, V).\n        green_color_lower_bound (Tuple[int, int, int]): Lower bound for green color in HSV format (H, S, V).\n        green_color_upper_bound (Tuple[int, int, int]): Upper bound for green color in HSV format (H, S, V).\n        ocr_config (str): Configuration string for Tesseract OCR (e.g., '--psm 10 -c tessedit_char_whitelist=0123456789').\n        min_contour_area (int): The minimum area (in pixels) for a contour to be considered a number, used to filter out noise.\n\n    Returns:\n        Dict[str, List[int]]: A dictionary with two keys, 'red_numbers' and 'green_numbers', each containing a list of the integers extracted for that color.\n    \"\"\"\n    try:\n        image = cv2.imread(image_path)\n        if image is None:\n            raise FileNotFoundError(f\"Image not found at path: {image_path}\")\n\n        hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)\n\n        # Create masks for red and green colors\n        red_mask = cv2.inRange(hsv_image, red_color_lower_bound, red_color_upper_bound)\n        green_mask = cv2.inRange(hsv_image, green_color_lower_bound, green_color_upper_bound)\n\n        def _extract_from_mask(mask: np.ndarray) -> List[int]:\n            \"\"\"Helper function to extract numbers from a given color mask.\"\"\"\n            numbers = []\n            contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n\n            # Sort contours top-to-bottom, then left-to-right for consistent order\n            if contours:\n                bounding_boxes = [cv2.boundingRect(c) for c in contours]\n                (contours, _) = zip(*sorted(zip(contours, bounding_boxes),\n                                            key=lambda b: (b[1][1], b[1][0])))\n\n            for contour in contours:\n                if cv2.contourArea(contour) < min_contour_area:\n                    continue\n\n                x, y, w, h = cv2.boundingRect(contour)\n                \n                # Add padding to ROI to prevent cropping number edges\n                padding = 5\n                roi = image[max(0, y - padding):min(image.shape[0], y + h + padding), \n                            max(0, x - padding):min(image.shape[1], x + w + padding)]\n\n                if roi.size == 0:\n                    continue\n                \n                # Pre-process ROI for better OCR accuracy\n                gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)\n                # Apply thresholding to get a clear black and white image\n                _, thresh_roi = cv2.threshold(gray_roi, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)\n\n                text = pytesseract.image_to_string(thresh_roi, config=ocr_config).strip()\n                \n                cleaned_text = ''.join(filter(str.isdigit, text))\n                if cleaned_text:\n                    numbers.append(int(cleaned_text))\n            return numbers\n\n        red_numbers = _extract_from_mask(red_mask)\n        green_numbers = _extract_from_mask(green_mask)\n\n        return {\n            \"red_numbers\": red_numbers,\n            \"green_numbers\": green_numbers\n        }\n    except Exception as e:\n        # In a real application, you might want to log the error.\n        # For this tool, returning a descriptive error string is sufficient.\n        return {\"error\": f\"An error occurred: {str(e)}\"}\n```",
-    "created_at": "2025-08-01T18:06:32.493499",
-    "usage_count": 4,
-    "last_used": "2025-08-02T06:49:08.127031"
-  },
   {
     "name": "calculate_deviation_average",
     "description": "Takes two lists of numbers (red and green). It calculates the population standard deviation for the red numbers and the sample standard deviation for the green numbers using Python's 'statistics' module. It then returns the average of these two deviation values, rounded to three decimal points.",
 
@@ -68,6 +68,8 @@ async def register_tools(script_info_path):
         logger.info(f"Script info file not found: {script_info_path}")
     except json.JSONDecodeError:
         logger.error(f"Error decoding JSON from script info file: {script_info_path}")
+    except Exception as e:
+        logger.error(f"An unexpected error occurred while registering tools: {e}")
 
     logger.info("All tools registered successfully.")
 
 
@@ -154,15 +154,14 @@ def _register_openai_models(self, use_local_proxy: bool = False):
             # deep research
             model_name = "o3-deep-research"
             model_id = "o3-deep-research"
-            client = AsyncOpenAI(
+
+            model = RestfulResponseModel(
+                api_base=self._check_local_api_base(local_api_base_name="SKYWORK_SHUBIAOBIAO_API_BASE",
+                                                    remote_api_base_name="OPENAI_API_BASE"),
                 api_key=api_key,
-                base_url=self._check_local_api_base(local_api_base_name="SKYWORK_API_BASE",
-                                                    remote_api_base_name="SKYWORK_API_BASE"),
-                http_client=ASYNC_HTTP_CLIENT,
-            )
-            model = LiteLLMModel(
+                api_type="responses",
                 model_id=model_id,
-                http_client=client,
+                http_client=HTTP_CLIENT,
                 custom_role_conversions=custom_role_conversions,
             )
             self.registed_models[model_name] = model