diff --git a/config/agent_gaia-validation-gpt5.yaml b/config/agent_gaia-validation-gpt5.yaml
new file mode 100644
index 00000000..f0d78f18
--- /dev/null
+++ b/config/agent_gaia-validation-gpt5.yaml
@@ -0,0 +1,79 @@
+defaults:
+  - benchmark: gaia-validation
+  - override hydra/job_logging: none
+  - _self_  # Allow defining variables at the top of this file
+
+
+main_agent:
+  prompt_class: MainAgentPrompt_GAIA
+  llm: 
+    provider_class: "GPT5OpenAIClient"
+    model_name: "gpt-5"
+    async_client: true
+    temperature: 1.0
+    top_p: 1.0
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 128000
+    reasoning_effort: "high"
+    openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+    openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+    openrouter_provider: ""
+    disable_cache_control: true
+    keep_tool_result: -1
+    oai_tool_thinking: false
+  
+  tool_config:
+    - tool-reasoning
+
+  max_turns: -1  # Maximum number of turns for main agent execution
+  max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+  
+  input_process:
+    hint_generation: true
+    hint_llm_base_url: "${oc.env:HINT_LLM_BASE_URL,https://api.openai.com/v1}"
+  output_process:
+    final_answer_extraction: true
+    final_answer_llm_base_url: "${oc.env:FINAL_ANSWER_LLM_BASE_URL,https://api.openai.com/v1}"
+
+  openai_api_key: "${oc.env:OPENAI_API_KEY,???}" # used for hint generation and final answer extraction
+  add_message_id: true
+  keep_tool_result: -1
+  chinese_context: "${oc.env:CHINESE_CONTEXT,false}"
+
+
+sub_agents:
+  agent-worker:
+    prompt_class: SubAgentWorkerPrompt
+    llm: 
+      provider_class: "GPT5OpenAIClient"
+      model_name: "gpt-5"
+      async_client: true
+      temperature: 1.0
+      top_p: 1.0
+      min_p: 0.0
+      top_k: -1
+      max_tokens: 128000
+      reasoning_effort: "medium"
+      openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+      openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+      openrouter_provider: ""
+      disable_cache_control: true
+      keep_tool_result: -1
+      oai_tool_thinking: false
+    
+    tool_config:
+      - tool-searching
+      - tool-image-video
+      - tool-reading
+      - tool-code
+      - tool-audio
+
+    max_turns: -1  # Maximum number of turns for main agent execution
+    max_tool_calls_per_turn: 10  # Maximum number of tool calls per turn
+
+
+# Can define some top-level or default parameters here
+output_dir: logs/
+data_dir: "${oc.env:DATA_DIR,data}"  # Points to where data is stored
+
diff --git a/config/tool/tool-reading.yaml b/config/tool/tool-reading.yaml
index 78fcb0e1..5038c4c6 100644
--- a/config/tool/tool-reading.yaml
+++ b/config/tool/tool-reading.yaml
@@ -2,4 +2,7 @@ name: "tool-reading"
 tool_command: "python"
 args:
   - "-m"
-  - "src.tool.mcp_servers.reading_mcp_server"
\ No newline at end of file
+  - "src.tool.mcp_servers.reading_mcp_server"
+env:
+  SERPER_API_KEY: "${oc.env:SERPER_API_KEY}"
+  JINA_API_KEY: "${oc.env:JINA_API_KEY}"
\ No newline at end of file
diff --git a/docs/mkdocs/docs/gaia_validation_gpt5.md b/docs/mkdocs/docs/gaia_validation_gpt5.md
new file mode 100644
index 00000000..1607e477
--- /dev/null
+++ b/docs/mkdocs/docs/gaia_validation_gpt5.md
@@ -0,0 +1,56 @@
+# GAIA Validation - GPT5
+
+MiroFlow now supports GPT-5 with MCP tool invocation, providing a unified workflow for multi-step reasoning, information integration, and scalable tool coordination.
+
+!!! info "Prerequisites"
+    Before proceeding, please review the [GAIA Validation Prerequisites](gaia_validation_prerequisites.md) document, which covers common setup requirements, dataset preparation, and API key configuration.
+
+---
+
+## Running the Evaluation
+
+### Step 1: Dataset Preparation
+
+Follow the [dataset preparation instructions](gaia_validation_prerequisites.md#dataset-preparation) in the prerequisites document.
+
+### Step 2: API Keys Configuration
+
+Configure the following API keys in your `.env` file:
+
+```env title="GPT-5 .env Configuration"
+# Search and web scraping capabilities
+SERPER_API_KEY="your-serper-api-key"
+JINA_API_KEY="your-jina-api-key"
+
+# Code execution environment
+E2B_API_KEY="your-e2b-api-key"
+
+# Vision understanding capabilities
+ANTHROPIC_API_KEY="your-anthropic-api-key"
+GEMINI_API_KEY="your-gemini-api-key"
+
+# Primary LLM provider, LLM judge, reasoning, and hint generation
+OPENAI_API_KEY="your-openai-api-key"
+OPENAI_BASE_URL="https://api.openai.com/v1"
+
+```
+
+### Step 3: Run the Evaluation
+
+Execute the evaluation using the GPT-5 configuration:
+
+```bash title="Run GAIA Validation with GPT-5"
+uv run main.py common-benchmark \
+  --config_file_name=agent_gaia-validation-gpt5 \
+  output_dir="logs/gaia-validation-gpt5/$(date +"%Y%m%d_%H%M")"
+```
+
+### Step 4: Monitor Progress
+
+Follow the [progress monitoring instructions](gaia_validation_prerequisites.md#progress-monitoring-and-resume) in the prerequisites document.
+
+
+---
+
+!!! info "Documentation Info"
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
\ No newline at end of file
diff --git a/docs/mkdocs/docs/llm_clients_overview.md b/docs/mkdocs/docs/llm_clients_overview.md
index b9894c58..08f3064d 100644
--- a/docs/mkdocs/docs/llm_clients_overview.md
+++ b/docs/mkdocs/docs/llm_clients_overview.md
@@ -9,6 +9,7 @@ MiroFlow supports multiple LLM providers through a unified client interface. Eac
 | `ClaudeAnthropicClient` | Anthropic Direct | claude-3-7-sonnet | `ANTHROPIC_API_KEY`, `ANTHROPIC_BASE_URL` |
 | `ClaudeOpenRouterClient` | OpenRouter | anthropic/claude-3.7-sonnet, and other [supported models](https://openrouter.ai/models) | `OPENROUTER_API_KEY`, `OPENROUTER_BASE_URL` |
 | `GPTOpenAIClient` | OpenAI | gpt-4, gpt-3.5 | `OPENAI_API_KEY`, `OPENAI_BASE_URL` |
+| `GPT5OpenAIClient` | OpenAI | gpt-5 | `OPENAI_API_KEY`, `OPENAI_BASE_URL` |
 | `MiroThinkerSGLangClient` | SGLang | MiroThinker series | `OAI_MIROTHINKER_API_KEY`, `OAI_MIROTHINKER_BASE_URL` |
 
 ## Basic Configuration
@@ -31,4 +32,4 @@ main_agent:
 ---
 
 !!! info "Documentation Info"
-    **Last Updated:** September 2025 · **Doc Contributor:** Team @ MiroMind AI
\ No newline at end of file
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
\ No newline at end of file
diff --git a/docs/mkdocs/docs/openai-gpt.md b/docs/mkdocs/docs/openai-gpt.md
index c3636913..5e181271 100644
--- a/docs/mkdocs/docs/openai-gpt.md
+++ b/docs/mkdocs/docs/openai-gpt.md
@@ -1,8 +1,45 @@
 # OpenAI GPT Models
 
-OpenAI's latest models including GPT-4o and advanced reasoning models with strong coding, vision, and reasoning capabilities.
+OpenAI's latest models including GPT-5, GPT-4o and advanced reasoning models with strong coding, vision, and reasoning capabilities.
 
-## Client Used
+## Client Used for GPT-5
+
+`GPT5OpenAIClient`
+
+## Environment Setup
+
+```bash title="Environment Variables"
+export OPENAI_API_KEY="your-openai-key"
+export OPENAI_BASE_URL="https://api.openai.com/v1"  # optional
+```
+
+## Configuration
+
+```yaml title="Agent Configuration"
+main_agent:
+  llm: 
+    provider_class: "GPT5OpenAIClient"
+    model_name: "gpt-5"
+    async_client: true
+    temperature: 1.0
+    top_p: 1.0
+    min_p: 0.0
+    top_k: -1
+    max_tokens: 128000
+    reasoning_effort: "high" # Use high in the main agent, and use the default medium in the sub-agent.
+    openai_api_key: "${oc.env:OPENAI_API_KEY,???}"
+    openai_base_url: "${oc.env:OPENAI_BASE_URL,https://api.openai.com/v1}"
+```
+
+## Usage
+
+```bash title="Example Command"
+# Create custom OpenAI config
+uv run main.py trace --config_file_name=your_config_file \
+    --task="Your task" --task_file_name="data/file.txt"
+```
+
+## Client Used for GPT-4o
 
 `GPTOpenAIClient`
 
@@ -32,7 +69,10 @@ uv run main.py trace --config_file_name=your_config_file \
     --task="Your task" --task_file_name="data/file.txt"
 ```
 
+!!! note "Configuration Notes"
+    - `GPTOpenAIClient` also supports GPT-5, but it has not been fully validated on MiroFlow yet. We recommend using `GPT5OpenAIClient`.
+
 ---
 
 !!! info "Documentation Info"
-    **Last Updated:** September 2025 · **Doc Contributor:** Team @ MiroMind AI
\ No newline at end of file
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
\ No newline at end of file
diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml
index a6db0940..7f9f7840 100644
--- a/docs/mkdocs/mkdocs.yml
+++ b/docs/mkdocs/mkdocs.yml
@@ -52,6 +52,7 @@ nav:
       - GAIA-Validation:
         - Prerequisites: gaia_validation_prerequisites.md
         - Claude-3.7-Sonnet: gaia_validation_claude37sonnet.md
+        - GPT-5: gaia_validation_gpt5.md
         - MiroThinker: gaia_validation_mirothinker.md
       - GAIA-Validation-Text-Only: gaia_validation_text_only.md
       - GAIA-Test: gaia_test.md
diff --git a/src/llm/provider_client_base.py b/src/llm/provider_client_base.py
index 5d07fcec..9333054e 100644
--- a/src/llm/provider_client_base.py
+++ b/src/llm/provider_client_base.py
@@ -43,6 +43,7 @@ def __post_init__(self):
         self.top_p: float = self.cfg.llm.top_p
         self.min_p: float = self.cfg.llm.min_p
         self.top_k: int = self.cfg.llm.top_k
+        self.reasoning_effort: str = self.cfg.llm.get("reasoning_effort", "medium")
         self.repetition_penalty: float = self.cfg.llm.get("repetition_penalty", 1.0)
         self.max_tokens: int = self.cfg.llm.max_tokens
         self.max_context_length: int = self.cfg.llm.get("max_context_length", -1)
diff --git a/src/llm/providers/claude_openrouter_client.py b/src/llm/providers/claude_openrouter_client.py
index b95fd031..54d1eff3 100644
--- a/src/llm/providers/claude_openrouter_client.py
+++ b/src/llm/providers/claude_openrouter_client.py
@@ -411,4 +411,4 @@ def _apply_cache_control(self, messages):
             else:
                 # Other messages add directly
                 cached_messages.append(turn)
-        return list(reversed(cached_messages))
+        return list(reversed(cached_messages))
\ No newline at end of file
diff --git a/src/llm/providers/gpt5_openai_client.py b/src/llm/providers/gpt5_openai_client.py
new file mode 100644
index 00000000..a71c9014
--- /dev/null
+++ b/src/llm/providers/gpt5_openai_client.py
@@ -0,0 +1,412 @@
+# SPDX-FileCopyrightText: 2025 MiromindAI
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import dataclasses
+import json
+import os
+import re
+from typing import Any, Dict, List
+
+import tiktoken
+from omegaconf import DictConfig
+from openai import AsyncOpenAI, OpenAI
+from tenacity import (
+    retry,
+    retry_if_not_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)
+
+from src.llm.provider_client_base import LLMProviderClientBase
+
+from src.logging.logger import bootstrap_logger
+
+LOGGER_LEVEL = os.getenv("LOGGER_LEVEL", "INFO")
+logger = bootstrap_logger(level=LOGGER_LEVEL)
+
+
+class ContextLimitError(Exception):
+    pass
+
+
+@dataclasses.dataclass
+class GPT5OpenAIClient(LLMProviderClientBase):
+    def _create_client(self, config: DictConfig):
+        """Create configured OpenAI client"""
+        if self.async_client:
+            return AsyncOpenAI(
+                api_key=self.cfg.llm.openai_api_key,
+                base_url=self.cfg.llm.openai_base_url,
+                timeout=1800,
+            )
+        else:
+            return OpenAI(
+                api_key=self.cfg.llm.openai_api_key,
+                base_url=self.cfg.llm.openai_base_url,
+                timeout=1800,
+            )
+
+    @retry(
+        wait=wait_exponential(multiplier=5),
+        stop=stop_after_attempt(5),
+        retry=retry_if_not_exception_type(ContextLimitError),
+    )
+    async def _create_message(
+        self,
+        system_prompt: str,
+        messages: List[Dict[str, Any]],
+        tools_definitions,
+        keep_tool_result: int = -1,
+    ):
+        """
+        Send message to OpenAI API.
+        :param system_prompt: System prompt string.
+        :param messages: Message history list.
+        :return: OpenAI API response object or None (if error).
+        """
+        logger.debug(f" Calling LLM ({'async' if self.async_client else 'sync'})")
+        # put the system prompt in the first message since OpenAI API does not support system prompt in
+        if system_prompt:
+            target_role = "system"
+
+            # Check if there are already system or developer messages
+            if messages and messages[0]["role"] in ["system", "developer"]:
+                # Replace existing message with correct role
+                messages[0] = {
+                    "role": target_role,
+                    "content": [dict(type="text", text=system_prompt)],
+                }
+            else:
+                # Insert new message
+                messages.insert(
+                    0,
+                    {
+                        "role": target_role,
+                        "content": [dict(type="text", text=system_prompt)],
+                    },
+                )
+
+        messages_copy = self._remove_tool_result_from_messages(
+            messages, keep_tool_result
+        )
+
+        # Apply cache control
+        if self.disable_cache_control:
+            processed_messages = messages_copy
+        else:
+            processed_messages = self._apply_cache_control(messages_copy)
+
+        params = None
+        try:
+            temperature = self.temperature
+
+            # build extra_body if self.openrouter_provider
+            provider_config = (self.openrouter_provider or "").strip().lower()
+            logger.info(f"provider_config: {provider_config}")
+            if provider_config == "google":
+                extra_body = {
+                    "provider": {
+                        "only": [
+                            "google-vertex/us",
+                            "google-vertex/europe",
+                            "google-vertex/global",
+                        ]
+                    }
+                }
+            elif provider_config == "anthropic":
+                extra_body = {"provider": {"only": ["anthropic"]}}
+                # extra_body["provider"]["ignore"] = ["google-vertex/us", "google-vertex/europe", "google-vertex/global"]
+            elif provider_config == "amazon":
+                extra_body = {"provider": {"only": ["amazon-bedrock"]}}
+            elif provider_config != "":
+                extra_body = {"provider": {"only": [provider_config]}}
+            else:
+                extra_body = {}
+
+            # Add top_k and min_p through extra_body for OpenRouter
+            if self.top_k != -1:
+                extra_body["top_k"] = self.top_k
+            if self.min_p != 0.0:
+                extra_body["min_p"] = self.min_p
+            if self.repetition_penalty != 1.0:
+                extra_body["repetition_penalty"] = self.repetition_penalty
+            
+            assert self.model_name in ["gpt-5-2025-08-07", "gpt-5"]
+            params = {
+                "model": self.model_name,
+                "temperature": temperature,
+                "max_completion_tokens": self.max_tokens,
+                "messages": processed_messages,
+                "stream": False,
+                "extra_body": extra_body,
+                "reasoning_effort": self.reasoning_effort,
+            }
+
+            # Add optional parameters only if they have non-default values
+            if self.top_p != 1.0:
+                params["top_p"] = self.top_p
+
+            response = await self._create_completion(params, self.async_client)
+
+            if (
+                response is None
+                or response.choices is None
+                or len(response.choices) == 0
+            ):
+                logger.debug(f"LLM call failed: response = {response}")
+                raise Exception(f"LLM call failed [rare case]: response = {response}")
+
+            if response.choices and response.choices[0].finish_reason == "length":
+                logger.debug(
+                    "LLM finish_reason is 'length', triggering ContextLimitError"
+                )
+                raise ContextLimitError(
+                    "(finish_reason=length) Response truncated due to maximum context length"
+                )
+
+            if (
+                response.choices
+                and response.choices[0].finish_reason == "stop"
+                and response.choices[0].message.content.strip() == ""
+            ):
+                logger.debug(
+                    "LLM finish_reason is 'stop', but content is empty, triggering Error"
+                )
+                raise Exception("LLM finish_reason is 'stop', but content is empty")
+
+            logger.debug(
+                f"LLM call finish_reason: {getattr(response.choices[0], 'finish_reason', 'N/A')}"
+            )
+            return response
+        except asyncio.CancelledError:
+            logger.debug("[WARNING] LLM API call was cancelled during execution")
+            raise Exception("LLM API call was cancelled during execution")
+        except Exception as e:
+            error_str = str(e)
+            if (
+                "Input is too long for requested model" in error_str
+                or "input length and `max_tokens` exceed context limit" in error_str
+                or "maximum context length" in error_str
+                or "prompt is too long" in error_str
+                or "exceeds the maximum length" in error_str
+                or "exceeds the maximum allowed length" in error_str
+                or "Input tokens exceed the configured limit" in error_str
+            ):
+                logger.debug(f"OpenRouter LLM Context limit exceeded: {error_str}")
+                raise ContextLimitError(f"Context limit exceeded: {error_str}")
+
+            logger.error(
+                f"OpenRouter LLM call failed: {str(e)}, input = {json.dumps(params)}",
+                exc_info=True,
+            )
+            raise e
+
+    async def _create_completion(self, params: Dict[str, Any], is_async: bool):
+        """Helper to create a completion, handling async and sync calls."""
+        if is_async:
+            return await self.client.chat.completions.create(**params)
+        else:
+            return self.client.chat.completions.create(**params)
+
+    def _clean_user_content_from_response(self, text: str) -> str:
+        """Remove content between \\n\\nUser: and <use_mcp_tool> in assistant response (if no <use_mcp_tool>, remove to end)"""
+        # Match content between \n\nUser: and <use_mcp_tool>, if no <use_mcp_tool> delete to text end
+        pattern = r"\n\nUser:.*?(?=<use_mcp_tool>|$)"
+        cleaned_text = re.sub(pattern, "", text, flags=re.MULTILINE | re.DOTALL)
+
+        return cleaned_text
+
+    def process_llm_response(
+        self, llm_response, message_history, agent_type="main"
+    ) -> tuple[str, bool]:
+        """Process OpenAI LLM response"""
+
+        if not llm_response or not llm_response.choices:
+            error_msg = "LLM did not return a valid response."
+            logger.error(f"Should never happen: {error_msg}")
+            return "", True  # Exit loop
+
+        # Extract LLM response text
+        if llm_response.choices[0].finish_reason == "stop":
+            assistant_response_text = llm_response.choices[0].message.content or ""
+            # remove user: {...} content
+            assistant_response_text = self._clean_user_content_from_response(
+                assistant_response_text
+            )
+            message_history.append(
+                {"role": "assistant", "content": assistant_response_text}
+            )
+        elif llm_response.choices[0].finish_reason == "length":
+            assistant_response_text = llm_response.choices[0].message.content or ""
+            if assistant_response_text == "":
+                assistant_response_text = "LLM response is empty. This is likely due to thinking block used up all tokens."
+            else:
+                assistant_response_text = self._clean_user_content_from_response(
+                    assistant_response_text
+                )
+            message_history.append(
+                {"role": "assistant", "content": assistant_response_text}
+            )
+        else:
+            logger.error(
+                f"Unsupported finish reason: {llm_response.choices[0].finish_reason}"
+            )
+            assistant_response_text = (
+                "Successful response, but unsupported finish reason: "
+                + llm_response.choices[0].finish_reason
+            )
+            message_history.append(
+                {"role": "assistant", "content": assistant_response_text}
+            )
+        logger.debug(f"LLM Response: {assistant_response_text}")
+
+        return assistant_response_text, False
+
+    def extract_tool_calls_info(self, llm_response, assistant_response_text):
+        """Extract tool call information from OpenAI LLM response"""
+        from src.utils.parsing_utils import parse_llm_response_for_tool_calls
+
+        # For Anthropic, parse tool calls from response text
+        return parse_llm_response_for_tool_calls(assistant_response_text)
+
+    def update_message_history(
+        self, message_history, tool_call_info, tool_calls_exceeded=False
+    ):
+        """Update message history with tool calls data (llm client specific)"""
+
+        # Filter tool call results with type "text"
+        tool_call_info = [item for item in tool_call_info if item[1]["type"] == "text"]
+
+        # Separate valid tool calls and bad tool calls
+        valid_tool_calls = [
+            (tool_id, content)
+            for tool_id, content in tool_call_info
+            if tool_id != "FAILED"
+        ]
+        bad_tool_calls = [
+            (tool_id, content)
+            for tool_id, content in tool_call_info
+            if tool_id == "FAILED"
+        ]
+
+        total_calls = len(valid_tool_calls) + len(bad_tool_calls)
+
+        # Build output text
+        output_parts = []
+
+        if total_calls > 1:
+            # Handling for multiple tool calls
+            # Add tool result description
+            if tool_calls_exceeded:
+                output_parts.append(
+                    f"You made too many tool calls. I can only afford to process {len(valid_tool_calls)} valid tool calls in this turn."
+                )
+            else:
+                output_parts.append(
+                    f"I have processed {len(valid_tool_calls)} valid tool calls in this turn."
+                )
+
+            # Output each valid tool call result according to format
+            for i, (tool_id, content) in enumerate(valid_tool_calls, 1):
+                output_parts.append(f"Valid tool call {i} result:\n{content['text']}")
+
+            # Output bad tool calls results
+            for i, (tool_id, content) in enumerate(bad_tool_calls, 1):
+                output_parts.append(f"Failed tool call {i} result:\n{content['text']}")
+        else:
+            # For single tool call, output result directly
+            for tool_id, content in valid_tool_calls:
+                output_parts.append(content["text"])
+            for tool_id, content in bad_tool_calls:
+                output_parts.append(content["text"])
+
+        merged_text = "\n\n".join(output_parts)
+
+        message_history.append(
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": merged_text}],
+            }
+        )
+        return message_history
+
+    def parse_llm_response(self, llm_response) -> str:
+        """Parse OpenAI LLM response to get text content"""
+        if not llm_response or not llm_response.choices:
+            raise ValueError("LLM did not return a valid response.")
+        return llm_response.choices[0].message.content
+
+    def _estimate_tokens(self, text: str) -> int:
+        """Use tiktoken to estimate token count of text"""
+        if not hasattr(self, "encoding"):
+            # Initialize tiktoken encoder
+            try:
+                self.encoding = tiktoken.get_encoding("o200k_base")
+            except Exception:
+                # If o200k_base is not available, use cl100k_base as fallback
+                self.encoding = tiktoken.get_encoding("cl100k_base")
+
+        try:
+            return len(self.encoding.encode(text))
+        except Exception:
+            # If encoding fails, use simple estimation: about 1 token per 4 characters
+            return len(text) // 4
+
+    def handle_max_turns_reached_summary_prompt(self, message_history, summary_prompt):
+        """Handle max turns reached summary prompt"""
+        if message_history[-1]["role"] == "user":
+            last_user_message = message_history.pop()
+            return (
+                last_user_message["content"][0]["text"]
+                + "\n\n-----------------\n\n"
+                + summary_prompt
+            )
+        else:
+            return summary_prompt
+
+    def _apply_cache_control(self, messages):
+        """Apply cache control to the last user message and system message (if applicable)"""
+        cached_messages = []
+        user_turns_processed = 0
+        for turn in reversed(messages):
+            if (turn["role"] == "user" and user_turns_processed < 1) or (
+                turn["role"] == "system"
+            ):
+                # Add ephemeral cache control to the text part of the last user message
+                new_content = []
+                processed_text = False
+                # Check if content is a list
+                if isinstance(turn.get("content"), list):
+                    # see example here
+                    # https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
+                    for item in turn["content"]:
+                        if (
+                            item.get("type") == "text"
+                            and len(item.get("text")) > 0
+                            and not processed_text
+                        ):
+                            # Copy and add cache control
+                            text_item = item.copy()
+                            text_item["cache_control"] = {"type": "ephemeral"}
+                            new_content.append(text_item)
+                            processed_text = True
+                        else:
+                            # Other types of content (like image) copy directly
+                            new_content.append(item.copy())
+                    cached_messages.append(
+                        {"role": turn["role"], "content": new_content}
+                    )
+                else:
+                    # If content is not a list (e.g., plain text), add as is without cache control
+                    # Or adjust logic as needed
+                    logger.debug(
+                        "Warning: User message content is not in expected list format, cache control not applied."
+                    )
+                    cached_messages.append(turn)
+                user_turns_processed += 1
+            else:
+                # Other messages add directly
+                cached_messages.append(turn)
+        return list(reversed(cached_messages))
diff --git a/src/llm/providers/gpt_openai_client.py b/src/llm/providers/gpt_openai_client.py
index b404e5f2..46d3d325 100644
--- a/src/llm/providers/gpt_openai_client.py
+++ b/src/llm/providers/gpt_openai_client.py
@@ -18,7 +18,7 @@
 
 LOGGER_LEVEL = os.getenv("LOGGER_LEVEL", "INFO")
 # OPENAI reasoning models only support temperature=1
-OPENAI_REASONING_MODEL_SET = set(["o1", "o3", "o3-mini", "o4-mini"])
+OPENAI_REASONING_MODEL_SET = set(["o1", "o3", "o3-mini", "o4-mini", "gpt-5", "gpt-5-2025-08-07"])
 
 logger = bootstrap_logger(level=LOGGER_LEVEL)
 
@@ -29,13 +29,15 @@ def _create_client(self, config: DictConfig):
         """Create configured OpenAI client"""
         if self.async_client:
             return AsyncOpenAI(
-                api_key=config.env.openai_api_key,
-                base_url=config.env.openai_base_url,
+                api_key=self.cfg.llm.openai_api_key,
+                base_url=self.cfg.llm.openai_base_url,
+                timeout=1800,
             )
         else:
             return OpenAI(
-                api_key=config.env.openai_api_key,
-                base_url=config.env.openai_base_url,
+                api_key=self.cfg.llm.openai_api_key,
+                base_url=self.cfg.llm.openai_base_url,
+                timeout=1800,
             )
 
     @retry(wait=wait_fixed(10), stop=stop_after_attempt(5))
@@ -58,6 +60,7 @@ async def _create_message(
             or self.model_name.startswith("o4")
             or self.model_name.startswith("gpt-4.1")
             or self.model_name.startswith("gpt-4o")
+            or self.model_name.startswith("gpt-5")
         )
         logger.debug(f" Calling LLM ({'async' if self.async_client else 'sync'})")
         # put the system prompt in the first message since OpenAI API does not support system prompt in
@@ -88,21 +91,28 @@ async def _create_message(
         tool_list = await self.convert_tool_definition_to_tool_call(tools_definitions)
 
         try:
-            # Set temperature=1 for reasoning models
-            temperature = (
-                1.0
-                if self.model_name in OPENAI_REASONING_MODEL_SET
-                else self.temperature
-            )
-
-            params = {
-                "model": self.model_name,
-                "temperature": temperature,
-                "max_completion_tokens": self.max_tokens,
-                "messages": messages_copy,
-                "tools": tool_list,
-                "stream": False,
-            }
+            # Set temperature and reasoning_effort for reasoning models
+            if self.model_name in OPENAI_REASONING_MODEL_SET:
+                temperature = 1.0
+                params = {
+                    "model": self.model_name,
+                    "temperature": temperature,
+                    "max_completion_tokens": self.max_tokens,
+                    "messages": messages_copy,
+                    "reasoning_effort": self.reasoning_effort,
+                    "tools": tool_list,
+                    "stream": False,
+                }
+            else:
+                temperature = self.temperature
+                params = {
+                    "model": self.model_name,
+                    "temperature": temperature,
+                    "max_completion_tokens": self.max_tokens,
+                    "messages": messages_copy,
+                    "tools": tool_list,
+                    "stream": False,
+                }
 
             if self.top_p != 1.0:
                 params["top_p"] = self.top_p
diff --git a/src/tool/mcp_servers/vision_mcp_server.py b/src/tool/mcp_servers/vision_mcp_server.py
index 8b816f60..24e1c775 100755
--- a/src/tool/mcp_servers/vision_mcp_server.py
+++ b/src/tool/mcp_servers/vision_mcp_server.py
@@ -87,6 +87,10 @@ async def call_claude_vision(image_path_or_url: str, question: str) -> str:
     ]
 
     try:
+        from urllib.parse import urlparse, unquote
+        parsed = urlparse(image_path_or_url)
+        if parsed.scheme == "file":
+            image_path_or_url = unquote(parsed.path)
         if os.path.exists(image_path_or_url):  # Check if the file exists locally
             with open(image_path_or_url, "rb") as image_file:
                 image_data = base64.b64encode(image_file.read()).decode("utf-8")