@@ -62,9 +62,11 @@ async def run_async(
62
62
# Set cache config to request
63
63
llm_request .cache_config = invocation_context .context_cache_config
64
64
65
- # Find latest cache metadata from session events
66
- latest_cache_metadata = self ._find_latest_cache_metadata (
67
- invocation_context , agent .name , invocation_context .invocation_id
65
+ # Find latest cache metadata and previous token count from session events
66
+ latest_cache_metadata , previous_token_count = (
67
+ self ._find_cache_info_from_events (
68
+ invocation_context , agent .name , invocation_context .invocation_id
69
+ )
68
70
)
69
71
70
72
if latest_cache_metadata :
@@ -77,51 +79,78 @@ async def run_async(
77
79
latest_cache_metadata .cached_contents_count ,
78
80
)
79
81
82
+ if previous_token_count is not None :
83
+ llm_request .cacheable_contents_token_count = previous_token_count
84
+ logger .debug (
85
+ 'Found previous prompt token count for agent %s: %d' ,
86
+ agent .name ,
87
+ previous_token_count ,
88
+ )
89
+
80
90
logger .debug ('Context caching enabled for agent %s' , agent .name )
81
91
82
92
# This processor yields no events
83
93
return
84
94
yield # AsyncGenerator requires a yield in function body
85
95
86
- def _find_latest_cache_metadata (
96
+ def _find_cache_info_from_events (
87
97
self ,
88
98
invocation_context : 'InvocationContext' ,
89
99
agent_name : str ,
90
100
current_invocation_id : str ,
91
- ) -> Optional [CacheMetadata ]:
92
- """Find the latest cache metadata from session events.
101
+ ) -> tuple [ Optional [CacheMetadata ], Optional [ int ] ]:
102
+ """Find cache metadata and previous token count from session events.
93
103
94
104
Args:
95
105
invocation_context: Context containing session with events
96
- agent_name: Name of agent to find cache metadata for
106
+ agent_name: Name of agent to find cache info for
97
107
current_invocation_id: Current invocation ID to compare for increment
98
108
99
109
Returns:
100
- Latest cache metadata for the agent (with updated invocations_used
101
- if needed), or None if not found
110
+ Tuple of (cache_metadata, previous_prompt_token_count)
111
+ cache_metadata: Latest cache metadata with updated invocations_used if needed
112
+ previous_prompt_token_count: Most recent prompt token count from LLM response
102
113
"""
103
114
if not invocation_context .session or not invocation_context .session .events :
104
- return None
115
+ return None , None
116
+
117
+ cache_metadata = None
118
+ previous_token_count = None
105
119
106
120
# Search events from most recent to oldest using index traversal
107
121
events = invocation_context .session .events
108
122
for i in range (len (events ) - 1 , - 1 , - 1 ):
109
123
event = events [i ]
110
- if event .cache_metadata is not None and event .author == agent_name :
111
-
112
- cache_metadata = event .cache_metadata
124
+ if event .author != agent_name :
125
+ continue
113
126
127
+ # Look for cache metadata (only in actual LLM response events)
128
+ if cache_metadata is None and event .cache_metadata is not None :
114
129
# Check if this is a different invocation - increment invocations_used
115
130
if event .invocation_id and event .invocation_id != current_invocation_id :
116
131
# Different invocation - increment invocations_used
117
- return cache_metadata .model_copy (
118
- update = {'invocations_used' : cache_metadata .invocations_used + 1 }
132
+ cache_metadata = event .cache_metadata .model_copy (
133
+ update = {
134
+ 'invocations_used' : event .cache_metadata .invocations_used + 1
135
+ }
119
136
)
120
137
else :
121
138
# Same invocation or no invocation_id - return as-is
122
- return cache_metadata
139
+ cache_metadata = event .cache_metadata
140
+
141
+ # Look for previous prompt token count (from actual LLM response events)
142
+ if (
143
+ previous_token_count is None
144
+ and event .usage_metadata
145
+ and event .usage_metadata .prompt_token_count is not None
146
+ ):
147
+ previous_token_count = event .usage_metadata .prompt_token_count
148
+
149
+ # Stop early if we found both pieces of information
150
+ if cache_metadata is not None and previous_token_count is not None :
151
+ break
123
152
124
- return None
153
+ return cache_metadata , previous_token_count
125
154
126
155
127
156
# Create processor instance for use in flows
0 commit comments