Skip to content

Commit 11d9d2e

Browse files
ArthurHeymanseca
andcommitted
Add tool-calling-with-thought-signatures integration test
- Add new test case that verifies Google's thought_signature is correctly captured and passed back in assistant messages with tool calls - Add raw message capture helper (set-raw-messages!/get-raw-messages) to inspect the actual messages sent back to the LLM - Add mock handler for tool calling flow with thought signatures - Rename and re-enable previously commented-out tool-calling test 🤖 Generated with [eca](https://eca.dev) Co-Authored-By: eca <[email protected]>
1 parent 858e9dd commit 11d9d2e

File tree

3 files changed

+152
-116
lines changed

3 files changed

+152
-116
lines changed

integration-test/integration/chat/google_test.clj

Lines changed: 94 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -173,111 +173,101 @@
173173
:instructions (m/pred string?)}
174174
(llm.mocks/get-req-body :reasoning-1)))))))
175175

176-
#_(deftest tool-calling
177-
(eca/start-process!)
176+
(deftest tool-calling-with-thought-signatures
177+
(eca/start-process!)
178178

179-
(eca/request! (fixture/initialize-request))
180-
(eca/notify! (fixture/initialized-notification))
181-
(let [chat-id* (atom nil)]
182-
(testing "We ask what files LLM see"
183-
(llm.mocks/set-case! :tool-calling-0)
184-
(let [0
185-
resp (eca/request! (fixture/chat-prompt-request
186-
{:model "google/gemini-2.5-pro"
187-
:message "What files you see?"}))
188-
chat-id (reset! chat-id* (:chatId resp))]
179+
(eca/request! (fixture/initialize-request))
180+
(eca/notify! (fixture/initialized-notification))
181+
(llm-mock.openai-chat/set-thinking-tag! "thought")
182+
(let [chat-id* (atom nil)]
183+
(testing "We ask what files LLM sees - tool call includes thought signature"
184+
(llm.mocks/set-case! :tool-calling-with-thought-signature-0)
185+
(let [resp (eca/request! (fixture/chat-prompt-request
186+
{:model "google/gemini-2.5-pro"
187+
:message "What files you see?"}))
188+
chat-id (reset! chat-id* (:chatId resp))]
189189

190-
(is (match?
191-
{:chatId (m/pred string?)
192-
:model "google/gemini-2.5-pro"
193-
:status "prompting"}
194-
resp))
190+
(is (match?
191+
{:chatId (m/pred string?)
192+
:model "google/gemini-2.5-pro"
193+
:status "prompting"}
194+
resp))
195+
196+
(match-content chat-id "user" {:type "text" :text "What files you see?\n"})
197+
(match-content chat-id "system" {:type "metadata" :title "Some Cool Title"})
198+
(match-content chat-id "system" {:type "progress" :state "running" :text "Waiting model"})
199+
(match-content chat-id "system" {:type "progress" :state "running" :text "Generating"})
200+
(match-content chat-id "assistant" {:type "reasonStarted" :id (m/pred string?)})
201+
;; Note: The buffering in process-text-think-aware keeps a 9-char tail to detect </thought>,
202+
;; so chunks get re-split during streaming. The mock sends "I s", "hould call tool", " eca__directory_tree"
203+
;; but after buffering we get these chunks:
204+
(match-content chat-id "assistant" {:type "reasonText" :id (m/pred string?) :text "I should "})
205+
(match-content chat-id "assistant" {:type "reasonText" :id (m/pred string?) :text "call tool eca__direc"})
206+
(match-content chat-id "assistant" {:type "reasonText" :id (m/pred string?) :text "tory_tree"})
207+
(match-content chat-id "assistant" {:type "reasonFinished" :id (m/pred string?) :totalTimeMs (m/pred number?)})
208+
;; Text is buffered (8-char tail for <thought> detection), then flushed when tool calls start
209+
(match-content chat-id "assistant" {:type "text" :text "I will li"})
210+
(match-content chat-id "assistant" {:type "text" :text "st files"})
211+
(match-content chat-id "assistant" {:type "toolCallPrepare"
212+
:origin "native"
213+
:id (m/pred string?)
214+
:name "directory_tree"
215+
:argumentsText ""
216+
:summary "Listing file tree"})
217+
(match-content chat-id "assistant" {:type "toolCallPrepare"
218+
:origin "native"
219+
:id (m/pred string?)
220+
:name "directory_tree"
221+
:argumentsText "{\"pat"
222+
:summary "Listing file tree"})
223+
(match-content chat-id "assistant" {:type "toolCallPrepare"
224+
:origin "native"
225+
:id (m/pred string?)
226+
:name "directory_tree"
227+
:argumentsText (str "h\":\"" (h/project-path->canon-path "resources") "\"}")
228+
:summary "Listing file tree"})
229+
(match-content chat-id "system" {:type "usage"})
230+
(match-content chat-id "assistant" {:type "toolCallRun"
231+
:origin "native"
232+
:id (m/pred string?)
233+
:name "directory_tree"
234+
:arguments {:path (h/project-path->canon-path "resources")}
235+
:manualApproval false
236+
:summary "Listing file tree"})
237+
(match-content chat-id "assistant" {:type "toolCallRunning"
238+
:origin "native"
239+
:id (m/pred string?)
240+
:name "directory_tree"
241+
:arguments {:path (h/project-path->canon-path "resources")}
242+
:summary "Listing file tree"})
243+
(match-content chat-id "system" {:type "progress" :state "running" :text "Calling tool"})
244+
(match-content chat-id "assistant" {:type "toolCalled"
245+
:origin "native"
246+
:id (m/pred string?)
247+
:name "directory_tree"
248+
:arguments {:path (h/project-path->canon-path "resources")}
249+
:summary "Listing file tree"
250+
:totalTimeMs (m/pred number?)
251+
:error false
252+
:outputs [{:type "text" :text (str (h/project-path->canon-path "resources") "\n"
253+
" file1.md\n"
254+
" file2.md\n\n"
255+
"0 directories, 2 files")}]})
256+
;; Text chunks get re-split due to 8-char tail buffering for <thought> detection.
257+
;; Note: We use m/in-any-order for the final text/usage/progress events since their
258+
;; relative ordering can vary due to async processing and buffering.
259+
(match-content chat-id "assistant" {:type "text" :text "The files"})
260+
(match-content chat-id "assistant" {:type "text" :text " I see:\nfile"})
261+
(match-content chat-id "assistant" {:type "text" :text "1\nfile2\n"})
262+
(match-content chat-id "system" {:type "progress" :state "finished"})
195263

196-
(match-content chat-id "user" {:type "text" :text "What files you see?\n"})
197-
(match-content chat-id "system" {:type "progress" :state "running" :text "Waiting model"})
198-
(match-content chat-id "system" {:type "progress" :state "running" :text "Generating"})
199-
(match-content chat-id "assistant" {:type "reasonStarted" :id (m/pred string?)})
200-
(match-content chat-id "assistant" {:type "reasonText" :id (m/pred string?) :text "I should call tool"})
201-
(match-content chat-id "assistant" {:type "reasonText" :id (m/pred string?) :text " eca__directory_tree"})
202-
(match-content chat-id "assistant" {:type "reasonFinished" :id (m/pred string?) :totalTimeMs (m/pred number?)})
203-
(match-content chat-id "assistant" {:type "text" :text "I will list files"})
204-
(match-content chat-id "assistant" {:type "toolCallPrepare"
205-
:origin "native"
206-
:id "tool-1"
207-
:name "directory_tree"
208-
:argumentsText ""
209-
:manualApproval false
210-
:summary "Listing file tree"})
211-
(match-content chat-id "assistant" {:type "toolCallPrepare"
212-
:origin "native"
213-
:id "tool-1"
214-
:name "directory_tree"
215-
:argumentsText "{\"pat"
216-
:manualApproval false
217-
:summary "Listing file tree"})
218-
(match-content chat-id "assistant" {:type "toolCallPrepare"
219-
:origin "native"
220-
:id "tool-1"
221-
:name "directory_tree"
222-
:argumentsText (str "h\":\"" (h/project-path->canon-path "resources") "\"}")
223-
:manualApproval false
224-
:summary "Listing file tree"})
225-
(match-content chat-id "system" {:type "usage"
226-
:messageInputTokens 5
227-
:messageOutputTokens 30
228-
:sessionTokens 35
229-
:messageCost (m/pred string?)
230-
:sessionCost (m/pred string?)})
231-
(match-content chat-id "assistant" {:type "toolCallRun"
232-
:origin "native"
233-
:id "tool-1"
234-
:name "directory_tree"
235-
:arguments {:path (h/project-path->canon-path "resources")}
236-
:manualApproval false
237-
:summary "Listing file tree"})
238-
(match-content chat-id "assistant" {:type "toolCallRunning"
239-
:origin "native"
240-
:id "tool-1"
241-
:name "directory_tree"
242-
:arguments {:path (h/project-path->canon-path "resources")}
243-
:totalTimeMs number?
244-
:summary "Listing file tree"})
245-
(match-content chat-id "assistant" {:type "toolCalled"
246-
:origin "native"
247-
:id "tool-1"
248-
:name "directory_tree"
249-
:arguments {:path (h/project-path->canon-path "resources")}
250-
:summary "Listing file tree"
251-
:error false
252-
:outputs [{:type "text" :text (str "[FILE] " (h/project-path->canon-path "resources/file1.md\n")
253-
"[FILE] " (h/project-path->canon-path "resources/file2.md\n"))}]})
254-
(match-content chat-id "assistant" {:type "text" :text "The files I see:\n"})
255-
(match-content chat-id "assistant" {:type "text" :text "file1\nfile2\n"})
256-
(match-content chat-id "system" {:type "usage"
257-
:messageInputTokens 5
258-
:messageOutputTokens 30
259-
:sessionTokens 70
260-
:messageCost (m/pred string?)
261-
:sessionCost (m/pred string?)})
262-
(match-content chat-id "system" {:type "progress" :state "finished"})
264+
;; Verify thought signature was passed back in the second request
265+
(let [raw-messages (llm.mocks/get-raw-messages :tool-calling-with-thought-signature-0)
266+
;; Find the assistant message with tool_calls
267+
assistant-tool-call-msg (first (filter #(and (= "assistant" (:role %))
268+
(seq (:tool_calls %)))
269+
raw-messages))]
263270
(is (match?
264-
{:messages [{:role "user" :content [{:type "text" :text "What files you see?"}]}
265-
{:role "assistant"
266-
:content [{:type "thinking"
267-
:signature "enc-123"
268-
:thinking "I should call tool eca__directory_tree"}]}
269-
{:role "assistant" :content [{:type "text" :text "I will list files"}]}
270-
{:role "assistant"
271-
:content [{:type "tool_use"
272-
:id "tool-1"
273-
:name "eca__directory_tree"
274-
:input {:path (h/project-path->canon-path "resources")}}]}
275-
{:role "user"
276-
:content [{:type "tool_result"
277-
:tool_use_id "tool-1"
278-
:content (str "[FILE] " (h/project-path->canon-path "resources/file1.md\n")
279-
"[FILE] " (h/project-path->canon-path "resources/file2.md\n\n"))}]}]
280-
:tools (m/embeds
281-
[{:name "eca__directory_tree"}])
282-
:system (m/pred vector?)}
283-
llm.mocks/*last-req-body*))))))
271+
{:role "assistant"
272+
:tool_calls [{:extra_content {:google {:thought_signature "thought-sig-abc123"}}}]}
273+
assistant-tool-call-msg)))))))

integration-test/llm_mock/mocks.clj

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,22 @@
66
(alter-var-root #'*case* (constantly case)))
77

88
(defonce ^:private req-bodies* (atom {}))
9+
(defonce ^:private raw-messages* (atom {}))
910

1011
(defn set-req-body! [mock-case-id body]
1112
(swap! req-bodies* assoc mock-case-id body))
1213

1314
(defn get-req-body [mock-case-id]
1415
(get @req-bodies* mock-case-id))
1516

17+
(defn set-raw-messages! [mock-case-id messages]
18+
(swap! raw-messages* assoc mock-case-id messages))
19+
20+
(defn get-raw-messages [mock-case-id]
21+
(get @raw-messages* mock-case-id))
22+
1623
(defn clean-req-bodies! []
17-
(reset! req-bodies* {}))
24+
(reset! req-bodies* {})
25+
(reset! raw-messages* {}))
1826

1927
(def chat-title-generator-str "Title generator")

integration-test/llm_mock/openai_chat.clj

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
(:require
33
[cheshire.core :as json]
44
[clojure.string :as string]
5+
[integration.helper :as h]
56
[llm-mock.mocks :as llm.mocks]
67
[org.httpkit.server :as hk]))
78

@@ -95,6 +96,37 @@
9596
{:choices [{:message {:content "Some Cool Title"}}]})
9697
true))
9798

99+
(defn ^:private tool-calling-with-thought-signature-0 [ch path]
100+
;; Send reasoning content first (thinking)
101+
(send-sse! ch {:choices [{:delta {:content (str "<" *thinking-tag* ">")}}]})
102+
(send-sse! ch {:choices [{:delta {:content "I s"}}]})
103+
(send-sse! ch {:choices [{:delta {:content "hould call tool"}}]})
104+
(send-sse! ch {:choices [{:delta {:content " eca__directory_tree"}}]})
105+
(send-sse! ch {:choices [{:delta {:content (str "</" *thinking-tag* ">")}}]})
106+
;; Send text before tool call
107+
(send-sse! ch {:choices [{:delta {:content "I will list files"}}]})
108+
;; Send tool call with thought signature
109+
(send-sse! ch {:choices [{:delta {:tool_calls [{:index 0
110+
:id "tool-1"
111+
:function {:name "eca__directory_tree"
112+
:arguments ""}
113+
:extra_content {:google {:thought_signature "thought-sig-abc123"}}}]}}]})
114+
(send-sse! ch {:choices [{:delta {:tool_calls [{:index 0
115+
:function {:arguments "{\"pat"}}]}}]})
116+
(send-sse! ch {:choices [{:delta {:tool_calls [{:index 0
117+
:function {:arguments (str "h\":\"" path "\"}")}}]}}]})
118+
(send-sse! ch {:usage {:prompt_tokens 5 :completion_tokens 30}})
119+
(send-sse! ch {:choices [{:delta {} :finish_reason "tool_calls"}]})
120+
(hk/close ch))
121+
122+
(defn ^:private tool-calling-with-thought-signature-1 [ch]
123+
;; Second stage response after tool output
124+
(send-sse! ch {:choices [{:delta {:content "The files I see:\n"}}]})
125+
(send-sse! ch {:choices [{:delta {:content "file1\nfile2\n"}}]})
126+
(send-sse! ch {:usage {:prompt_tokens 5 :completion_tokens 30}})
127+
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
128+
(hk/close ch))
129+
98130
(defn handle-openai-chat [req]
99131
;; Capture and normalize the request body for assertions in tests
100132
(let [body (some-> (slurp (:body req)) (json/parse-string true))
@@ -114,14 +146,20 @@
114146
(chat-title-text-0 ch)
115147
(do
116148
(llm.mocks/set-req-body! llm.mocks/*case* normalized-body)
117-
(case llm.mocks/*case*
118-
:simple-text-0 (simple-text-0 ch)
119-
:simple-text-1 (simple-text-1 ch)
120-
:simple-text-2 (simple-text-2 ch)
121-
:reasoning-0 (reasoning-text-0 ch)
122-
:reasoning-1 (reasoning-text-1 ch)
123-
;; default fallback
124-
(do
125-
(send-sse! ch {:choices [{:delta {:content "hello"}}]})
126-
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
127-
(hk/close ch))))))})))
149+
(llm.mocks/set-raw-messages! llm.mocks/*case* messages)
150+
(let [has-tool-message? (some #(= "tool" (:role %)) messages)]
151+
(case llm.mocks/*case*
152+
:simple-text-0 (simple-text-0 ch)
153+
:simple-text-1 (simple-text-1 ch)
154+
:simple-text-2 (simple-text-2 ch)
155+
:reasoning-0 (reasoning-text-0 ch)
156+
:reasoning-1 (reasoning-text-1 ch)
157+
:tool-calling-with-thought-signature-0
158+
(if has-tool-message?
159+
(tool-calling-with-thought-signature-1 ch)
160+
(tool-calling-with-thought-signature-0 ch (h/project-path->canon-path "resources")))
161+
;; default fallback
162+
(do
163+
(send-sse! ch {:choices [{:delta {:content "hello"}}]})
164+
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
165+
(hk/close ch)))))))})))

0 commit comments

Comments
 (0)