Skip to content
5 changes: 5 additions & 0 deletions .changeset/fast-mcp-direct-call.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"gradio": patch
---

fix: bypass HTTP loopback for non-queued MCP tool calls, calling `blocks.process_api()` directly to reduce latency
48 changes: 34 additions & 14 deletions gradio/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from gradio.blocks import BlockFunction
from gradio.components import State
from gradio.route_utils import Header
from gradio.state_holder import SessionState

if TYPE_CHECKING:
from mcp import types # noqa: F401
Expand Down Expand Up @@ -326,7 +327,7 @@ def _format_progress_message(update: StatusUpdate) -> str | None:
return None

async def _execute_tool_with_progress( # type: ignore
self, job: Any, progress_token: str
self, job: Any, progress_token: str | int
) -> dict[str, Any]:
"""
Execute a tool call with progress tracking (streaming path).
Expand Down Expand Up @@ -394,27 +395,46 @@ async def call_tool(
name: The name of the tool to call.
arguments: The arguments to pass to the tool.
"""
progress_token = None
if self.mcp_server.request_context.meta is not None:
progress_token = self.mcp_server.request_context.meta.progressToken

client = await run_sync(self._get_or_create_client)
endpoint_name, processed_args, request_headers, block_fn = (
self._prepare_tool_call_args(name, arguments)
)
processed_args = self.insert_empty_state(block_fn.inputs, processed_args)
job = client.submit(
*processed_args, api_name=endpoint_name, headers=request_headers
)

if progress_token is None or not block_fn.queue:
output_data = await self._execute_tool_without_progress(job)
if not block_fn.queue:
# Fast path for non-queued events: call blocks.process_api()
# directly instead of the HTTP loopback through gradio_client.
# This eliminates thread dispatches, TCP round-trips, and SSE
# overhead — reducing MCP tool-call latency significantly.
session_state = SessionState(self.blocks)
raw_output = await self.blocks.process_api(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should pass the request here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! Added request=self.mcp_server.request_context.request to the process_api() call in a96be5a.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you!

block_fn=block_fn,
inputs=processed_args,
state=session_state,
request=self.mcp_server.request_context.request,
)
output_data = raw_output["data"]
else:
output_data = await self._execute_tool_with_progress( # type: ignore
job,
progress_token, # type: ignore
# Queued path: use the HTTP loopback to preserve streaming
# updates, progress notifications, and queue-based features.
progress_token = None
if self.mcp_server.request_context.meta is not None:
progress_token = self.mcp_server.request_context.meta.progressToken

client = await run_sync(self._get_or_create_client)
job = client.submit(
*processed_args,
api_name=endpoint_name,
headers=request_headers,
)

if progress_token is None:
output_data = await self._execute_tool_without_progress(job)
else:
output_data = await self._execute_tool_with_progress(
job,
progress_token,
)

output_data = self.pop_returned_state(block_fn.outputs, output_data)

context_request: Request | None = self.mcp_server.request_context.request
Expand Down
1 change: 1 addition & 0 deletions guides/10_mcp/01_building-mcp-server-with-gradio.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ Now, all you need to do is add this URL endpoint to your MCP Client (e.g. Claude

<video src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/gradio-guides/mcp_guide1.mp4" style="width:100%" controls preload> </video>

Tip: To minimize latency and increase throughput by as much as 10 times, set queue=False in the event handlers of your Gradio app. However, this disables progress notifications so its recommended that long running events set queue=True

## Converting an Existing Space

Expand Down
9 changes: 9 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,19 @@ def process(name: str, hidden_state: str, flag: bool, gallery_images):
output = gr.Textbox(label="Result")

btn = gr.Button("Process")
btn_nq = gr.Button("Process No Queue")
btn.click(
process,
inputs=[name_input, hidden_state, flag_input, gallery],
outputs=[output],
api_visibility="public",
)
btn_nq.click(
process,
inputs=[name_input, hidden_state, flag_input, gallery],
outputs=[output],
api_visibility="public",
api_name="no_queue",
queue=False,
)
return demo
22 changes: 20 additions & 2 deletions test/test_mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ async def test_mcp_streamable_http_client_with_stateful_app(stateful_mcp_app):
await session.initialize()

tools_response = await session.list_tools()
assert len(tools_response.tools) == 1
tool = tools_response.tools[0]
assert len(tools_response.tools) == 2
tool, tool_nq = tools_response.tools

result = await session.call_tool(
tool.name,
Expand All @@ -366,6 +366,24 @@ async def test_mcp_streamable_http_client_with_stateful_app(stateful_mcp_app):
result.content[0].text # type: ignore
== "name=test, hidden_state=hidden_value, flag=True, gallery=42"
)
result = await session.call_tool(
tool_nq.name,
arguments={"name": "test_2", "flag": True, "gallery_images": 42},
)
assert len(result.content) == 1 # type: ignore
assert (
result.content[0].text # type: ignore
== "name=test_2, hidden_state=hidden_value, flag=True, gallery=42"
)
result = await session.call_tool(
tool_nq.name,
arguments={"name": "test_3", "flag": True, "gallery_images": 44},
)
assert len(result.content) == 1 # type: ignore
assert (
result.content[0].text # type: ignore
== "name=test_3, hidden_state=hidden_value, flag=True, gallery=44"
)
finally:
stateful_mcp_app.close()

Expand Down
Loading