Add cache_complete_prompt option for caching complete prompts in LLM response for claude.

amanjaiswal73892 · amanjaiswal73892 · commit 3bb882f4f343 · 2025-06-10T15:51:23.000-04:00
diff --git a/src/agentlab/agents/tool_use_agent/multi_tool_agent.py b/src/agentlab/agents/tool_use_agent/multi_tool_agent.py
@@ -392,7 +392,10 @@ def get_action(self, obs: Any) -> float:
         self.config.summarizer.apply(self.llm, self.messages)
         logging.info("Main tool calling")
         response: LLMOutput = self.llm(
-            messages=self.messages, tool_choice="any", cache_tool_definition=True
+            messages=self.messages,
+            tool_choice="any",
+            cache_tool_definition=True,
+            cache_complete_prompt=True,
         )
         logging.info(f"Obtained response {response}")
 
diff --git a/src/agentlab/llm/response_api.py b/src/agentlab/llm/response_api.py
@@ -537,6 +537,9 @@ def _call_api(
         if kwargs.pop("cache_tool_definition", False):
             # Indicating cache control for the last tool enables caching of all previous tool definitions.
             api_params["tools"][-1]["cache_control"] = {"type": "ephemeral"}
+        if kwargs.pop("cache_complete_prompt", False):
+            # Indicating cache control for the last message enables caching of the complete prompt.
+            api_params["messages"][-1]["content"][-1]["cache_control"] = {"type": "ephemeral"}
         if self.extra_kwargs.get("reasoning", None) is not None:
             api_params["reasoning"] = self.extra_kwargs["reasoning"]