Skip to content

Commit c66245a

Browse files
seanzhougooglecopybara-github
authored andcommitted
feat: support context caching
1. add a context cache config in app level which will apply to all agents in the app 2. pass on cache config through invocation context to llm_reqeust 3. store cache metadata in llm_response 4. lookup old cache metadata from latest event for reusing old cache 5. create new cache if old cache cannot be reused PiperOrigin-RevId: 809158578
1 parent 13a95c4 commit c66245a

20 files changed

+3234
-7
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from pydantic import BaseModel
18+
from pydantic import ConfigDict
19+
from pydantic import Field
20+
21+
from ..utils.feature_decorator import experimental
22+
23+
24+
@experimental
25+
class ContextCacheConfig(BaseModel):
26+
"""Configuration for context caching across all agents in an app.
27+
28+
This configuration enables and controls context caching behavior for
29+
all LLM agents in an app. When this config is present on an app, context
30+
caching is enabled for all agents. When absent (None), context caching
31+
is disabled.
32+
33+
Context caching can significantly reduce costs and improve response times
34+
by reusing previously processed context across multiple requests.
35+
36+
Attributes:
37+
cache_intervals: Maximum number of invocations to reuse the same cache before refreshing it
38+
ttl_seconds: Time-to-live for cache in seconds
39+
min_tokens: Minimum tokens required to enable caching
40+
"""
41+
42+
model_config = ConfigDict(
43+
extra="forbid",
44+
)
45+
46+
cache_intervals: int = Field(
47+
default=10,
48+
ge=1,
49+
le=100,
50+
description=(
51+
"Maximum number of invocations to reuse the same cache before"
52+
" refreshing it"
53+
),
54+
)
55+
56+
ttl_seconds: int = Field(
57+
default=1800, # 30 minutes
58+
gt=0,
59+
description="Time-to-live for cache in seconds",
60+
)
61+
62+
min_tokens: int = Field(
63+
default=0,
64+
ge=0,
65+
description=(
66+
"Minimum estimated request tokens required to enable caching. This"
67+
" compares against the estimated total tokens of the request (system"
68+
" instruction + tools + contents). Context cache storage may have"
69+
" cost. Set higher to avoid caching small requests where overhead may"
70+
" exceed benefits."
71+
),
72+
)
73+
74+
@property
75+
def ttl_string(self) -> str:
76+
"""Get TTL as string format for cache creation."""
77+
return f"{self.ttl_seconds}s"
78+
79+
def __str__(self) -> str:
80+
"""String representation for logging."""
81+
return (
82+
f"ContextCacheConfig(cache_intervals={self.cache_intervals}, "
83+
f"ttl={self.ttl_seconds}s, min_tokens={self.min_tokens})"
84+
)

src/google/adk/agents/invocation_context.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
from typing import Optional
18+
from typing import TYPE_CHECKING
1819
import uuid
1920

2021
from google.genai import types
@@ -33,6 +34,7 @@
3334
from ..utils.feature_decorator import working_in_progress
3435
from .active_streaming_tool import ActiveStreamingTool
3536
from .base_agent import BaseAgent
37+
from .context_cache_config import ContextCacheConfig
3638
from .live_request_queue import LiveRequestQueue
3739
from .run_config import RunConfig
3840
from .transcription_entry import TranscriptionEntry
@@ -141,6 +143,7 @@ class InvocationContext(BaseModel):
141143
session_service: BaseSessionService
142144
memory_service: Optional[BaseMemoryService] = None
143145
credential_service: Optional[BaseCredentialService] = None
146+
context_cache_config: Optional[ContextCacheConfig] = None
144147

145148
invocation_id: str
146149
"""The id of this invocation context. Readonly."""

src/google/adk/apps/app.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from pydantic import Field
2121

2222
from ..agents.base_agent import BaseAgent
23+
from ..agents.context_cache_config import ContextCacheConfig
2324
from ..apps.base_events_compactor import BaseEventsCompactor
2425
from ..plugins.base_plugin import BasePlugin
2526
from ..utils.feature_decorator import experimental
@@ -53,3 +54,6 @@ class App(BaseModel):
5354

5455
event_compactor: Optional[BaseEventsCompactor] = None
5556
"""The event compactor strategy for the application."""
57+
58+
context_cache_config: Optional[ContextCacheConfig] = None
59+
"""Context cache configuration that applies to all LLM agents in the app."""
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Context cache processor for LLM requests."""
16+
17+
from __future__ import annotations
18+
19+
import logging
20+
from typing import AsyncGenerator
21+
from typing import Optional
22+
from typing import TYPE_CHECKING
23+
24+
from ...events.event import Event
25+
from ...models.cache_metadata import CacheMetadata
26+
from ._base_llm_processor import BaseLlmRequestProcessor
27+
28+
if TYPE_CHECKING:
29+
from ...agents.invocation_context import InvocationContext
30+
from ...models.llm_request import LlmRequest
31+
32+
logger = logging.getLogger('google_adk.' + __name__)
33+
34+
35+
class ContextCacheRequestProcessor(BaseLlmRequestProcessor):
36+
"""Request processor that enables context caching for LLM requests.
37+
38+
This processor sets up context caching configuration for agents that have
39+
context caching enabled and finds the latest cache metadata from session
40+
events. The actual cache management is handled by the model-specific cache
41+
managers (e.g., GeminiContextCacheManager).
42+
"""
43+
44+
async def run_async(
45+
self, invocation_context: 'InvocationContext', llm_request: 'LlmRequest'
46+
) -> AsyncGenerator[Event, None]:
47+
"""Process LLM request to enable context caching.
48+
49+
Args:
50+
invocation_context: Invocation context containing agent and session info
51+
llm_request: Request to process for caching
52+
53+
Yields:
54+
Event: No events are yielded by this processor
55+
"""
56+
agent = invocation_context.agent
57+
58+
# Return early if no cache config
59+
if not invocation_context.context_cache_config:
60+
return
61+
62+
# Set cache config to request
63+
llm_request.cache_config = invocation_context.context_cache_config
64+
65+
# Find latest cache metadata from session events
66+
latest_cache_metadata = self._find_latest_cache_metadata(
67+
invocation_context, agent.name, invocation_context.invocation_id
68+
)
69+
70+
if latest_cache_metadata:
71+
llm_request.cache_metadata = latest_cache_metadata
72+
logger.debug(
73+
'Found cache metadata for agent %s: invocations_used=%d, '
74+
'cached_contents=%d',
75+
agent.name,
76+
latest_cache_metadata.invocations_used,
77+
latest_cache_metadata.cached_contents_count,
78+
)
79+
80+
logger.debug('Context caching enabled for agent %s', agent.name)
81+
82+
# This processor yields no events
83+
return
84+
yield # AsyncGenerator requires a yield in function body
85+
86+
def _find_latest_cache_metadata(
87+
self,
88+
invocation_context: 'InvocationContext',
89+
agent_name: str,
90+
current_invocation_id: str,
91+
) -> Optional[CacheMetadata]:
92+
"""Find the latest cache metadata from session events.
93+
94+
Args:
95+
invocation_context: Context containing session with events
96+
agent_name: Name of agent to find cache metadata for
97+
current_invocation_id: Current invocation ID to compare for increment
98+
99+
Returns:
100+
Latest cache metadata for the agent (with updated invocations_used
101+
if needed), or None if not found
102+
"""
103+
if not invocation_context.session or not invocation_context.session.events:
104+
return None
105+
106+
# Search events from most recent to oldest using index traversal
107+
events = invocation_context.session.events
108+
for i in range(len(events) - 1, -1, -1):
109+
event = events[i]
110+
if event.cache_metadata is not None and event.author == agent_name:
111+
112+
cache_metadata = event.cache_metadata
113+
114+
# Check if this is a different invocation - increment invocations_used
115+
if event.invocation_id and event.invocation_id != current_invocation_id:
116+
# Different invocation - increment invocations_used
117+
return cache_metadata.model_copy(
118+
update={'invocations_used': cache_metadata.invocations_used + 1}
119+
)
120+
else:
121+
# Same invocation or no invocation_id - return as-is
122+
return cache_metadata
123+
124+
return None
125+
126+
127+
# Create processor instance for use in flows
128+
request_processor = ContextCacheRequestProcessor()

src/google/adk/flows/llm_flows/single_flow.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from . import _output_schema_processor
2424
from . import basic
2525
from . import contents
26+
from . import context_cache_processor
2627
from . import identity
2728
from . import instructions
2829
from . import request_confirmation
@@ -48,6 +49,8 @@ def __init__(self):
4849
instructions.request_processor,
4950
identity.request_processor,
5051
contents.request_processor,
52+
# Context cache processor sets up cache config and finds existing cache metadata
53+
context_cache_processor.request_processor,
5154
# Some implementations of NL Planning mark planning contents as thoughts
5255
# in the post processor. Since these need to be unmarked, NL Planning
5356
# should be after contents.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import time
18+
from typing import Optional
19+
20+
from pydantic import BaseModel
21+
from pydantic import ConfigDict
22+
from pydantic import Field
23+
24+
25+
class CacheMetadata(BaseModel):
26+
"""Metadata for context cache associated with LLM responses.
27+
28+
This class stores cache identification, usage tracking, and lifecycle
29+
information for a particular cache instance.
30+
31+
Token counts (cached and total) are available in the LlmResponse.usage_metadata
32+
and should be accessed from there to avoid duplication.
33+
34+
Attributes:
35+
cache_name: The full resource name of the cached content (e.g.,
36+
'projects/123/locations/us-central1/cachedContents/456')
37+
expire_time: Unix timestamp when the cache expires
38+
fingerprint: Hash of agent configuration (instruction + tools + model)
39+
invocations_used: Number of invocations this cache has been used for
40+
cached_contents_count: Number of contents stored in this cache
41+
created_at: Unix timestamp when the cache was created
42+
"""
43+
44+
model_config = ConfigDict(
45+
extra="forbid",
46+
frozen=True, # Cache metadata should be immutable
47+
)
48+
49+
cache_name: str = Field(
50+
description="Full resource name of the cached content"
51+
)
52+
53+
expire_time: float = Field(description="Unix timestamp when cache expires")
54+
55+
fingerprint: str = Field(
56+
description="Hash of agent configuration used to detect changes"
57+
)
58+
59+
invocations_used: int = Field(
60+
ge=0,
61+
description="Number of invocations this cache has been used for",
62+
)
63+
64+
cached_contents_count: int = Field(
65+
ge=0,
66+
description="Number of contents stored in this cache",
67+
)
68+
69+
created_at: Optional[float] = Field(
70+
default=None,
71+
description=(
72+
"Unix timestamp when cache was created (None if reused existing)"
73+
),
74+
)
75+
76+
@property
77+
def expire_soon(self) -> bool:
78+
"""Check if the cache will expire soon (with 2-minute buffer)."""
79+
buffer_seconds = 120 # 2 minutes buffer for processing time
80+
return time.time() > (self.expire_time - buffer_seconds)
81+
82+
def __str__(self) -> str:
83+
"""String representation for logging and debugging."""
84+
cache_id = self.cache_name.split("/")[-1]
85+
time_until_expiry_minutes = (self.expire_time - time.time()) / 60
86+
return (
87+
f"Cache {cache_id}: used {self.invocations_used} invocations, "
88+
f"cached {self.cached_contents_count} contents, "
89+
f"expires in {time_until_expiry_minutes:.1f}min"
90+
)

0 commit comments

Comments
 (0)