Add prompts caching + working examples

amri369 · amri369 · commit 204701cb88bb · 2025-07-12T12:06:26.000+04:00
diff --git a/examples/mcp/caching/README.md b/examples/mcp/caching/README.md
@@ -0,0 +1,13 @@
+# Caching Example
+
+This example show how to integrate tools and prompts caching using a Streamable HTTP server in [server.py](server.py).
+
+Run the example via:
+
+```
+uv run python examples/mcp/caching/main.py
+```
+
+## Details
+
+The example uses the `MCPServerStreamableHttp` class from `agents.mcp`. The server runs in a sub-process at `https://localhost:8000/mcp`.
diff --git a/examples/mcp/caching/main.py b/examples/mcp/caching/main.py
@@ -0,0 +1,74 @@
+import asyncio
+import os
+import shutil
+import subprocess
+import time
+from typing import Any
+
+from agents import gen_trace_id, trace
+from agents.mcp import MCPServerStreamableHttp
+
+
+async def run(mcp_server: MCPServerStreamableHttp):
+    print(f"Cached tools before invoking tool_list")
+    print(mcp_server._tools_list)
+    await mcp_server.list_tools()
+    print(f"Cached tools names after invoking list_tools")
+    cached_tools_list = mcp_server._tools_list
+    for tool in cached_tools_list:
+        print(f"name: {tool.name}")
+
+    print(f"Cached prompts before invoking list_prompts")
+    print(mcp_server._prompts_list)
+    await mcp_server.list_prompts()
+    print(f"\nCached prompts after invoking list_prompts")
+    cached_prompts_list = mcp_server._prompts_list
+    for prompt in cached_prompts_list.prompts:
+        print(f"name: {prompt.name}")
+
+async def main():
+    async with MCPServerStreamableHttp(
+        name="Streamable HTTP Python Server",
+        cache_tools_list=True,
+        cache_prompts_list=True,
+        params={
+            "url": "http://localhost:8000/mcp",
+        },
+    ) as server:
+        trace_id = gen_trace_id()
+        with trace(workflow_name="Caching Example", trace_id=trace_id):
+            print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}\n")
+            await run(server)
+
+
+if __name__ == "__main__":
+    # Let's make sure the user has uv installed
+    if not shutil.which("uv"):
+        raise RuntimeError(
+            "uv is not installed. Please install it: https://docs.astral.sh/uv/getting-started/installation/"
+        )
+
+    # We'll run the Streamable HTTP server in a subprocess. Usually this would be a remote server, but for this
+    # demo, we'll run it locally at http://localhost:8000/mcp
+    process: subprocess.Popen[Any] | None = None
+    try:
+        this_dir = os.path.dirname(os.path.abspath(__file__))
+        server_file = os.path.join(this_dir, "server.py")
+
+        print("Starting Streamable HTTP server at http://localhost:8000/mcp ...")
+
+        # Run `uv run server.py` to start the Streamable HTTP server
+        process = subprocess.Popen(["uv", "run", server_file])
+        # Give it 3 seconds to start
+        time.sleep(3)
+
+        print("Streamable HTTP server started. Running example...\n\n")
+    except Exception as e:
+        print(f"Error starting Streamable HTTP server: {e}")
+        exit(1)
+
+    try:
+        asyncio.run(main())
+    finally:
+        if process:
+            process.terminate()
diff --git a/examples/mcp/caching/server.py b/examples/mcp/caching/server.py
@@ -0,0 +1,37 @@
+import random
+
+import requests
+from mcp.server.fastmcp import FastMCP
+
+# Create server
+mcp = FastMCP("Echo Server")
+
+
+@mcp.tool()
+def add(a: int, b: int) -> int:
+    """Add two numbers"""
+    print(f"[debug-server] add({a}, {b})")
+    return a + b
+
+
+@mcp.tool()
+def get_secret_word() -> str:
+    print("[debug-server] get_secret_word()")
+    return random.choice(["apple", "banana", "cherry"])
+
+
+@mcp.tool()
+def get_current_weather(city: str) -> str:
+    print(f"[debug-server] get_current_weather({city})")
+
+    endpoint = "https://wttr.in"
+    response = requests.get(f"{endpoint}/{city}")
+    return response.text
+
+@mcp.prompt()
+def system_prompt() -> str:
+    return "Use the tools to answer the questions."
+
+
+if __name__ == "__main__":
+    mcp.run(transport="streamable-http")
diff --git a/src/agents/mcp/server.py b/src/agents/mcp/server.py
@@ -84,6 +84,7 @@ class _MCPServerWithClientSession(MCPServer, abc.ABC):
     def __init__(
         self,
         cache_tools_list: bool,
+        cache_prompts_list: bool,
         client_session_timeout_seconds: float | None,
         tool_filter: ToolFilter = None,
     ):
@@ -96,20 +97,30 @@ def __init__(
             server will not change its tools list, because it can drastically improve latency
             (by avoiding a round-trip to the server every time).
 
+            cache_prompts_list: Whether to cache the prompts list. If `True`, the prompts list will be
+            cached and only fetched from the server once. If `False`, the prompts list will be
+            fetched from the server on each call to `list_prompts()`. The cache can be invalidated
+            by calling `invalidate_prompts_cache()`. You should set this to `True` if you know the
+            server will not change its prompts list, because it can drastically improve latency
+            (by avoiding a round-trip to the server every time).
+
             client_session_timeout_seconds: the read timeout passed to the MCP ClientSession.
             tool_filter: The tool filter to use for filtering tools.
         """
         self.session: ClientSession | None = None
         self.exit_stack: AsyncExitStack = AsyncExitStack()
         self._cleanup_lock: asyncio.Lock = asyncio.Lock()
         self.cache_tools_list = cache_tools_list
+        self.cache_prompts_list = cache_prompts_list
         self.server_initialize_result: InitializeResult | None = None
 
         self.client_session_timeout_seconds = client_session_timeout_seconds
 
-        # The cache is always dirty at startup, so that we fetch tools at least once
-        self._cache_dirty = True
+        # The cache is always dirty at startup, so that we fetch tools and prompts at least once
+        self._cache_dirty_tools = True
         self._tools_list: list[MCPTool] | None = None
+        self._cache_dirty_prompts = True
+        self._prompts_list: ListPromptsResult | None = None
 
         self.tool_filter = tool_filter
 
@@ -213,7 +224,11 @@ async def __aexit__(self, exc_type, exc_value, traceback):
 
     def invalidate_tools_cache(self):
         """Invalidate the tools cache."""
-        self._cache_dirty = True
+        self._cache_dirty_tools = True
+
+    def invalidate_prompts_cache(self):
+        """Invalidate the prompts cache."""
+        self._cache_dirty_prompts = True
 
     async def connect(self):
         """Connect to the server."""
@@ -251,11 +266,11 @@ async def list_tools(
             raise UserError("Server not initialized. Make sure you call `connect()` first.")
 
         # Return from cache if caching is enabled, we have tools, and the cache is not dirty
-        if self.cache_tools_list and not self._cache_dirty and self._tools_list:
+        if self.cache_tools_list and not self._cache_dirty_tools and self._tools_list:
             tools = self._tools_list
         else:
             # Reset the cache dirty to False
-            self._cache_dirty = False
+            self._cache_dirty_tools = False
             # Fetch the tools from the server
             self._tools_list = (await self.session.list_tools()).tools
             tools = self._tools_list
@@ -282,7 +297,16 @@ async def list_prompts(
         if not self.session:
             raise UserError("Server not initialized. Make sure you call `connect()` first.")
 
-        return await self.session.list_prompts()
+        if self.cache_prompts_list and not self._cache_dirty_prompts and self._prompts_list:
+            prompts = self._prompts_list
+        else:
+            # Reset the cache dirty to False
+            self._cache_dirty_prompts = False
+            # Fetch the prompts from the server
+            self._prompts_list = await self.session.list_prompts()
+            prompts = self._tools_list
+
+        return prompts
 
     async def get_prompt(
         self, name: str, arguments: dict[str, Any] | None = None
@@ -343,6 +367,7 @@ def __init__(
         self,
         params: MCPServerStdioParams,
         cache_tools_list: bool = False,
+        cache_prompts_list: bool = False,
         name: str | None = None,
         client_session_timeout_seconds: float | None = 5,
         tool_filter: ToolFilter = None,
@@ -354,21 +379,31 @@ def __init__(
                 start the server, the args to pass to the command, the environment variables to
                 set for the server, the working directory to use when spawning the process, and
                 the text encoding used when sending/receiving messages to the server.
+
             cache_tools_list: Whether to cache the tools list. If `True`, the tools list will be
                 cached and only fetched from the server once. If `False`, the tools list will be
                 fetched from the server on each call to `list_tools()`. The cache can be
                 invalidated by calling `invalidate_tools_cache()`. You should set this to `True`
                 if you know the server will not change its tools list, because it can drastically
                 improve latency (by avoiding a round-trip to the server every time).
+
+            cache_prompts_list: Whether to cache the prompts list. If `True`, the prompts list will be
+                cached and only fetched from the server once. If `False`, the prompts list will be
+                fetched from the server on each call to `list_prompts()`. The cache can be invalidated
+                by calling `invalidate_prompts_cache()`. You should set this to `True` if you know the
+                server will not change its prompts list, because it can drastically improve latency
+                (by avoiding a round-trip to the server every time).
+
             name: A readable name for the server. If not provided, we'll create one from the
                 command.
             client_session_timeout_seconds: the read timeout passed to the MCP ClientSession.
             tool_filter: The tool filter to use for filtering tools.
         """
         super().__init__(
-            cache_tools_list,
-            client_session_timeout_seconds,
-            tool_filter,
+            cache_tools_list=cache_tools_list,
+            cache_prompts_list=cache_prompts_list,
+            client_session_timeout_seconds=client_session_timeout_seconds,
+            tool_filter=tool_filter,
         )
 
         self.params = StdioServerParameters(
@@ -426,6 +461,7 @@ def __init__(
         self,
         params: MCPServerSseParams,
         cache_tools_list: bool = False,
+        cache_prompts_list: bool = False,
         name: str | None = None,
         client_session_timeout_seconds: float | None = 5,
         tool_filter: ToolFilter = None,
@@ -444,16 +480,24 @@ def __init__(
                 if you know the server will not change its tools list, because it can drastically
                 improve latency (by avoiding a round-trip to the server every time).
 
+            cache_prompts_list: Whether to cache the prompts list. If `True`, the prompts list will be
+                cached and only fetched from the server once. If `False`, the prompts list will be
+                fetched from the server on each call to `list_prompts()`. The cache can be invalidated
+                by calling `invalidate_prompts_cache()`. You should set this to `True` if you know the
+                server will not change its prompts list, because it can drastically improve latency
+                (by avoiding a round-trip to the server every time).
+
             name: A readable name for the server. If not provided, we'll create one from the
                 URL.
 
             client_session_timeout_seconds: the read timeout passed to the MCP ClientSession.
             tool_filter: The tool filter to use for filtering tools.
         """
         super().__init__(
-            cache_tools_list,
-            client_session_timeout_seconds,
-            tool_filter,
+            cache_tools_list=cache_tools_list,
+            cache_prompts_list=cache_prompts_list,
+            client_session_timeout_seconds=client_session_timeout_seconds,
+            tool_filter=tool_filter,
         )
 
         self.params = params
@@ -511,6 +555,7 @@ def __init__(
         self,
         params: MCPServerStreamableHttpParams,
         cache_tools_list: bool = False,
+        cache_prompts_list: bool = False,
         name: str | None = None,
         client_session_timeout_seconds: float | None = 5,
         tool_filter: ToolFilter = None,
@@ -530,16 +575,24 @@ def __init__(
                 if you know the server will not change its tools list, because it can drastically
                 improve latency (by avoiding a round-trip to the server every time).
 
+            cache_prompts_list: Whether to cache the prompts list. If `True`, the prompts list will be
+                cached and only fetched from the server once. If `False`, the prompts list will be
+                fetched from the server on each call to `list_prompts()`. The cache can be invalidated
+                by calling `invalidate_prompts_cache()`. You should set this to `True` if you know the
+                server will not change its prompts list, because it can drastically improve latency
+                (by avoiding a round-trip to the server every time).
+
             name: A readable name for the server. If not provided, we'll create one from the
                 URL.
 
             client_session_timeout_seconds: the read timeout passed to the MCP ClientSession.
             tool_filter: The tool filter to use for filtering tools.
         """
         super().__init__(
-            cache_tools_list,
-            client_session_timeout_seconds,
-            tool_filter,
+            cache_tools_list=cache_tools_list,
+            cache_prompts_list=cache_prompts_list,
+            client_session_timeout_seconds=client_session_timeout_seconds,
+            tool_filter=tool_filter,
         )
 
         self.params = params