google
diff --git a/‎src/google/adk/agents/context_cache_config.py‎
Lines changed: 84 additions & 0 deletions b/‎src/google/adk/agents/context_cache_config.py‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎src/google/adk/agents/invocation_context.py‎
Lines changed: 3 additions & 0 deletions b/‎src/google/adk/agents/invocation_context.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/google/adk/apps/app.py‎
Lines changed: 4 additions & 0 deletions b/‎src/google/adk/apps/app.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/google/adk/flows/llm_flows/context_cache_processor.py‎
Lines changed: 128 additions & 0 deletions b/‎src/google/adk/flows/llm_flows/context_cache_processor.py‎
Lines changed: 128 additions & 0 deletions
diff --git a/‎src/google/adk/flows/llm_flows/single_flow.py‎
Lines changed: 3 additions & 0 deletions b/‎src/google/adk/flows/llm_flows/single_flow.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/google/adk/models/cache_metadata.py‎
Lines changed: 90 additions & 0 deletions b/‎src/google/adk/models/cache_metadata.py‎
Lines changed: 90 additions & 0 deletions
@@ -0,0 +1,84 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+
+from ..utils.feature_decorator import experimental
+
+
+@experimental
+class ContextCacheConfig(BaseModel):
+  """Configuration for context caching across all agents in an app.
+
+  This configuration enables and controls context caching behavior for
+  all LLM agents in an app. When this config is present on an app, context
+  caching is enabled for all agents. When absent (None), context caching
+  is disabled.
+
+  Context caching can significantly reduce costs and improve response times
+  by reusing previously processed context across multiple requests.
+
+  Attributes:
+      cache_intervals: Maximum number of invocations to reuse the same cache before refreshing it
+      ttl_seconds: Time-to-live for cache in seconds
+      min_tokens: Minimum tokens required to enable caching
+  """
+
+  model_config = ConfigDict(
+      extra="forbid",
+  )
+
+  cache_intervals: int = Field(
+      default=10,
+      ge=1,
+      le=100,
+      description=(
+          "Maximum number of invocations to reuse the same cache before"
+          " refreshing it"
+      ),
+  )
+
+  ttl_seconds: int = Field(
+      default=1800,  # 30 minutes
+      gt=0,
+      description="Time-to-live for cache in seconds",
+  )
+
+  min_tokens: int = Field(
+      default=0,
+      ge=0,
+      description=(
+          "Minimum estimated request tokens required to enable caching. This"
+          " compares against the estimated total tokens of the request (system"
+          " instruction + tools + contents). Context cache storage may have"
+          " cost. Set higher to avoid caching small requests where overhead may"
+          " exceed benefits."
+      ),
+  )
+
+  @property
+  def ttl_string(self) -> str:
+    """Get TTL as string format for cache creation."""
+    return f"{self.ttl_seconds}s"
+
+  def __str__(self) -> str:
+    """String representation for logging."""
+    return (
+        f"ContextCacheConfig(cache_intervals={self.cache_intervals}, "
+        f"ttl={self.ttl_seconds}s, min_tokens={self.min_tokens})"
+    )
@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 from typing import Optional
+from typing import TYPE_CHECKING
 import uuid
 
 from google.genai import types
@@ -33,6 +34,7 @@
 from ..utils.feature_decorator import working_in_progress
 from .active_streaming_tool import ActiveStreamingTool
 from .base_agent import BaseAgent
+from .context_cache_config import ContextCacheConfig
 from .live_request_queue import LiveRequestQueue
 from .run_config import RunConfig
 from .transcription_entry import TranscriptionEntry
@@ -141,6 +143,7 @@ class InvocationContext(BaseModel):
   session_service: BaseSessionService
   memory_service: Optional[BaseMemoryService] = None
   credential_service: Optional[BaseCredentialService] = None
+  context_cache_config: Optional[ContextCacheConfig] = None
 
   invocation_id: str
   """The id of this invocation context. Readonly."""
 
@@ -20,6 +20,7 @@
 from pydantic import Field
 
 from ..agents.base_agent import BaseAgent
+from ..agents.context_cache_config import ContextCacheConfig
 from ..apps.base_events_compactor import BaseEventsCompactor
 from ..plugins.base_plugin import BasePlugin
 from ..utils.feature_decorator import experimental
@@ -53,3 +54,6 @@ class App(BaseModel):
 
   event_compactor: Optional[BaseEventsCompactor] = None
   """The event compactor strategy for the application."""
+
+  context_cache_config: Optional[ContextCacheConfig] = None
+  """Context cache configuration that applies to all LLM agents in the app."""
@@ -0,0 +1,128 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Context cache processor for LLM requests."""
+
+from __future__ import annotations
+
+import logging
+from typing import AsyncGenerator
+from typing import Optional
+from typing import TYPE_CHECKING
+
+from ...events.event import Event
+from ...models.cache_metadata import CacheMetadata
+from ._base_llm_processor import BaseLlmRequestProcessor
+
+if TYPE_CHECKING:
+  from ...agents.invocation_context import InvocationContext
+  from ...models.llm_request import LlmRequest
+
+logger = logging.getLogger('google_adk.' + __name__)
+
+
+class ContextCacheRequestProcessor(BaseLlmRequestProcessor):
+  """Request processor that enables context caching for LLM requests.
+
+  This processor sets up context caching configuration for agents that have
+  context caching enabled and finds the latest cache metadata from session
+  events. The actual cache management is handled by the model-specific cache
+  managers (e.g., GeminiContextCacheManager).
+  """
+
+  async def run_async(
+      self, invocation_context: 'InvocationContext', llm_request: 'LlmRequest'
+  ) -> AsyncGenerator[Event, None]:
+    """Process LLM request to enable context caching.
+
+    Args:
+        invocation_context: Invocation context containing agent and session info
+        llm_request: Request to process for caching
+
+    Yields:
+        Event: No events are yielded by this processor
+    """
+    agent = invocation_context.agent
+
+    # Return early if no cache config
+    if not invocation_context.context_cache_config:
+      return
+
+    # Set cache config to request
+    llm_request.cache_config = invocation_context.context_cache_config
+
+    # Find latest cache metadata from session events
+    latest_cache_metadata = self._find_latest_cache_metadata(
+        invocation_context, agent.name, invocation_context.invocation_id
+    )
+
+    if latest_cache_metadata:
+      llm_request.cache_metadata = latest_cache_metadata
+      logger.debug(
+          'Found cache metadata for agent %s: invocations_used=%d, '
+          'cached_contents=%d',
+          agent.name,
+          latest_cache_metadata.invocations_used,
+          latest_cache_metadata.cached_contents_count,
+      )
+
+    logger.debug('Context caching enabled for agent %s', agent.name)
+
+    # This processor yields no events
+    return
+    yield  # AsyncGenerator requires a yield in function body
+
+  def _find_latest_cache_metadata(
+      self,
+      invocation_context: 'InvocationContext',
+      agent_name: str,
+      current_invocation_id: str,
+  ) -> Optional[CacheMetadata]:
+    """Find the latest cache metadata from session events.
+
+    Args:
+        invocation_context: Context containing session with events
+        agent_name: Name of agent to find cache metadata for
+        current_invocation_id: Current invocation ID to compare for increment
+
+    Returns:
+        Latest cache metadata for the agent (with updated invocations_used
+        if needed), or None if not found
+    """
+    if not invocation_context.session or not invocation_context.session.events:
+      return None
+
+    # Search events from most recent to oldest using index traversal
+    events = invocation_context.session.events
+    for i in range(len(events) - 1, -1, -1):
+      event = events[i]
+      if event.cache_metadata is not None and event.author == agent_name:
+
+        cache_metadata = event.cache_metadata
+
+        # Check if this is a different invocation - increment invocations_used
+        if event.invocation_id and event.invocation_id != current_invocation_id:
+          # Different invocation - increment invocations_used
+          return cache_metadata.model_copy(
+              update={'invocations_used': cache_metadata.invocations_used + 1}
+          )
+        else:
+          # Same invocation or no invocation_id - return as-is
+          return cache_metadata
+
+    return None
+
+
+# Create processor instance for use in flows
+request_processor = ContextCacheRequestProcessor()
@@ -23,6 +23,7 @@
 from . import _output_schema_processor
 from . import basic
 from . import contents
+from . import context_cache_processor
 from . import identity
 from . import instructions
 from . import request_confirmation
@@ -48,6 +49,8 @@ def __init__(self):
         instructions.request_processor,
         identity.request_processor,
         contents.request_processor,
+        # Context cache processor sets up cache config and finds existing cache metadata
+        context_cache_processor.request_processor,
         # Some implementations of NL Planning mark planning contents as thoughts
         # in the post processor. Since these need to be unmarked, NL Planning
         # should be after contents.
 
@@ -0,0 +1,90 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import time
+from typing import Optional
+
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+
+
+class CacheMetadata(BaseModel):
+  """Metadata for context cache associated with LLM responses.
+
+  This class stores cache identification, usage tracking, and lifecycle
+  information for a particular cache instance.
+
+  Token counts (cached and total) are available in the LlmResponse.usage_metadata
+  and should be accessed from there to avoid duplication.
+
+  Attributes:
+      cache_name: The full resource name of the cached content (e.g.,
+          'projects/123/locations/us-central1/cachedContents/456')
+      expire_time: Unix timestamp when the cache expires
+      fingerprint: Hash of agent configuration (instruction + tools + model)
+      invocations_used: Number of invocations this cache has been used for
+      cached_contents_count: Number of contents stored in this cache
+      created_at: Unix timestamp when the cache was created
+  """
+
+  model_config = ConfigDict(
+      extra="forbid",
+      frozen=True,  # Cache metadata should be immutable
+  )
+
+  cache_name: str = Field(
+      description="Full resource name of the cached content"
+  )
+
+  expire_time: float = Field(description="Unix timestamp when cache expires")
+
+  fingerprint: str = Field(
+      description="Hash of agent configuration used to detect changes"
+  )
+
+  invocations_used: int = Field(
+      ge=0,
+      description="Number of invocations this cache has been used for",
+  )
+
+  cached_contents_count: int = Field(
+      ge=0,
+      description="Number of contents stored in this cache",
+  )
+
+  created_at: Optional[float] = Field(
+      default=None,
+      description=(
+          "Unix timestamp when cache was created (None if reused existing)"
+      ),
+  )
+
+  @property
+  def expire_soon(self) -> bool:
+    """Check if the cache will expire soon (with 2-minute buffer)."""
+    buffer_seconds = 120  # 2 minutes buffer for processing time
+    return time.time() > (self.expire_time - buffer_seconds)
+
+  def __str__(self) -> str:
+    """String representation for logging and debugging."""
+    cache_id = self.cache_name.split("/")[-1]
+    time_until_expiry_minutes = (self.expire_time - time.time()) / 60
+    return (
+        f"Cache {cache_id}: used {self.invocations_used} invocations, "
+        f"cached {self.cached_contents_count} contents, "
+        f"expires in {time_until_expiry_minutes:.1f}min"
+    )