Skip to content

Commit ffb1882

Browse files
committed
Adjust docs n tests for better model defaults
Signed-off-by: Christopher Petito <chrisjpetito@gmail.com>
1 parent 37fdad0 commit ffb1882

15 files changed

+1407
-176
lines changed

cagent-schema.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -451,15 +451,15 @@
451451
},
452452
"provider_opts": {
453453
"type": "object",
454-
"description": "Provider-specific options. dmr: runtime_flags. anthropic: interleaved_thinking (boolean, default false). openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance).",
454+
"description": "Provider-specific options. dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance).",
455455
"additionalProperties": true
456456
},
457457
"track_usage": {
458458
"type": "boolean",
459459
"description": "Whether to track usage"
460460
},
461461
"thinking_budget": {
462-
"description": "Controls reasoning effort/budget. OpenAI: string levels ('minimal','low','medium','high'). Anthropic: integer token budget (1024-32768). Gemini: integer token budget (-1 for unlimited, 0 to disable, 24576 max).",
462+
"description": "Controls reasoning effort/budget. OpenAI: string levels ('minimal','low','medium','high'), default 'medium'. Anthropic: integer token budget (1024-32768), default 8192. Amazon Bedrock (Claude): same as Anthropic. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max), default -1. Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'), default 'high' for Pro, 'medium' for Flash.",
463463
"oneOf": [
464464
{
465465
"type": "string",
@@ -469,21 +469,23 @@
469469
"medium",
470470
"high"
471471
],
472-
"description": "Reasoning effort level (OpenAI)"
472+
"description": "Reasoning effort level (OpenAI, Gemini 3)"
473473
},
474474
{
475475
"type": "integer",
476476
"minimum": -1,
477477
"maximum": 32768,
478-
"description": "Token budget for extended thinking (Anthropic, Google)"
478+
"description": "Token budget for extended thinking (Anthropic, Bedrock Claude, Gemini 2.5)"
479479
}
480480
],
481481
"examples": [
482482
"minimal",
483483
"low",
484484
"medium",
485485
"high",
486+
-1,
486487
1024,
488+
8192,
487489
32768
488490
]
489491
},

e2e/cagent_exec_test.go

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,31 +51,48 @@ func TestExec_OpenAI_gpt5_codex(t *testing.T) {
5151
func TestExec_Anthropic(t *testing.T) {
5252
out := cagentExec(t, "testdata/basic.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?")
5353

54-
require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
54+
// With interleaved thinking enabled by default, Anthropic responses include thinking content
55+
require.Contains(t, out, "--- Agent: root ---")
56+
require.Contains(t, out, "2 + 2 = 4")
5557
}
5658

5759
func TestExec_Anthropic_ToolCall(t *testing.T) {
5860
out := cagentExec(t, "testdata/fs_tools.yaml", "--model=anthropic/claude-sonnet-4-0", "How many files in testdata/working_dir? Only output the number.")
5961

60-
require.Equal(t, "\n--- Agent: root ---\n\nCalling list_directory(path: \"testdata/working_dir\")\n\nlist_directory response → \"FILE README.me\\n\"\n1", out)
62+
// With interleaved thinking enabled by default, Anthropic responses include thinking content
63+
require.Contains(t, out, "--- Agent: root ---")
64+
require.Contains(t, out, `Calling list_directory(path: "testdata/working_dir")`)
65+
require.Contains(t, out, `list_directory response → "FILE README.me\n"`)
66+
// The response should end with "1" (the count)
67+
require.True(t, out != "" && out[len(out)-1] == '1', "response should end with '1'")
6168
}
6269

6370
func TestExec_Anthropic_AgentsMd(t *testing.T) {
6471
out := cagentExec(t, "testdata/agents-md.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?")
6572

66-
require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
73+
// With interleaved thinking enabled by default, Anthropic responses include thinking content
74+
require.Contains(t, out, "--- Agent: root ---")
75+
require.Contains(t, out, "2 + 2 = 4")
6776
}
6877

6978
func TestExec_Gemini(t *testing.T) {
7079
out := cagentExec(t, "testdata/basic.yaml", "--model=google/gemini-2.5-flash", "What's 2+2?")
7180

72-
require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
81+
// With thinking enabled by default (dynamic thinking for Gemini 2.5), responses may include thinking content
82+
require.Contains(t, out, "--- Agent: root ---")
83+
// The response should contain the answer "4" somewhere
84+
require.Contains(t, out, "4")
7385
}
7486

7587
func TestExec_Gemini_ToolCall(t *testing.T) {
7688
out := cagentExec(t, "testdata/fs_tools.yaml", "--model=google/gemini-2.5-flash", "How many files in testdata/working_dir? Only output the number.")
7789

78-
require.Equal(t, "\n--- Agent: root ---\n\nCalling list_directory(path: \"testdata/working_dir\")\n\nlist_directory response → \"FILE README.me\\n\"\n1", out)
90+
// With thinking enabled by default (dynamic thinking for Gemini 2.5), responses include thinking content
91+
require.Contains(t, out, "--- Agent: root ---")
92+
require.Contains(t, out, `Calling list_directory(path: "testdata/working_dir")`)
93+
require.Contains(t, out, `list_directory response → "FILE README.me\n"`)
94+
// The response should end with "1" (the count)
95+
require.True(t, out != "" && out[len(out)-1] == '1', "response should end with '1'")
7996
}
8097

8198
func TestExec_Mistral(t *testing.T) {

e2e/cagent_mcp_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,6 @@ func TestMCP_MultiAgent(t *testing.T) {
5454
})
5555

5656
require.NoError(t, err)
57-
assert.Equal(t, "Hello, how can I help you today?", output.Response)
57+
// Model response to "say hello" can vary, just check it contains a greeting
58+
assert.Contains(t, output.Response, "Hello")
5859
}

e2e/testdata/cassettes/TestA2AServer_MultiAgent.yaml

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ interactions:
88
proto_minor: 1
99
content_length: 0
1010
host: api.openai.com
11-
body: '{"messages":[{"content":"You are a multi-agent system, make sure to answer the user query in the most helpful way possible. You have access to these sub-agents:\nName: web | Description: \n\nIMPORTANT: You can ONLY transfer tasks to the agents listed above using their ID. The valid agent names are: web. You MUST NOT attempt to transfer to any other agent IDs - doing so will cause system errors.\n\nIf you are the best to answer the question according to your description, you can answer it.\n\nIf another agent is better for answering the question according to its description, call `transfer_task` function to transfer the question to that agent using the agent''s ID. When transferring, do not generate any text other than the function call.\n\n","role":"system"},{"content":"You are a knowledgeable assistant that helps users with various tasks.\nBe helpful, accurate, and concise in your responses.\n","role":"system"},{"content":"Say hello.","role":"user"}],"model":"gpt-5-mini","stream_options":{"include_usage":true},"tools":[{"function":{"name":"transfer_task","description":"Use this function to transfer a task to the selected team member.\n You must provide a clear and concise description of the task the member should achieve AND the expected output.","parameters":{"additionalProperties":false,"properties":{"agent":{"description":"The name of the agent to transfer the task to.","type":"string"},"expected_output":{"description":"The expected output from the member (optional).","type":"string"},"task":{"description":"A clear and concise description of the task the member should achieve.","type":"string"}},"required":["agent","expected_output","task"],"type":"object"}},"type":"function"}],"stream":true}'
11+
body: '{"messages":[{"content":"You are a multi-agent system, make sure to answer the user query in the most helpful way possible. You have access to these sub-agents:\nName: web | Description: \n\nIMPORTANT: You can ONLY transfer tasks to the agents listed above using their ID. The valid agent names are: web. You MUST NOT attempt to transfer to any other agent IDs - doing so will cause system errors.\n\nIf you are the best to answer the question according to your description, you can answer it.\n\nIf another agent is better for answering the question according to its description, call `transfer_task` function to transfer the question to that agent using the agent''s ID. When transferring, do not generate any text other than the function call.\n\n","role":"system"},{"content":"You are a knowledgeable assistant that helps users with various tasks.\nBe helpful, accurate, and concise in your responses.\n","role":"system"},{"content":"Say hello.","role":"user"}],"model":"gpt-5-mini","reasoning_effort":"medium","stream_options":{"include_usage":true},"tools":[{"function":{"name":"transfer_task","description":"Use this function to transfer a task to the selected team member.\n You must provide a clear and concise description of the task the member should achieve AND the expected output.","parameters":{"additionalProperties":false,"properties":{"agent":{"description":"The name of the agent to transfer the task to.","type":"string"},"expected_output":{"description":"The expected output from the member (optional).","type":"string"},"task":{"description":"A clear and concise description of the task the member should achieve.","type":"string"}},"required":["agent","expected_output","task"],"type":"object"}},"type":"function"}],"stream":true}'
1212
url: https://api.openai.com/v1/chat/completions
1313
method: POST
1414
response:
@@ -17,33 +17,19 @@ interactions:
1717
proto_minor: 0
1818
content_length: -1
1919
body: |+
20-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"finish_reason":null}],"usage":null,"obfuscation":"c186g"}
20+
data: {"id":"chatcmpl-CykKataoyjM1Yk8bJpDvOBdhXLMtu","object":"chat.completion.chunk","created":1768593812,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"finish_reason":null}],"usage":null,"obfuscation":"zXPgf"}
2121
22-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}],"usage":null,"obfuscation":"57"}
22+
data: {"id":"chatcmpl-CykKataoyjM1Yk8bJpDvOBdhXLMtu","object":"chat.completion.chunk","created":1768593812,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}],"usage":null,"obfuscation":"IJ"}
2323
24-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}],"usage":null,"obfuscation":"XDOJHR"}
24+
data: {"id":"chatcmpl-CykKataoyjM1Yk8bJpDvOBdhXLMtu","object":"chat.completion.chunk","created":1768593812,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}],"usage":null,"obfuscation":"C0Oy9w"}
2525
26-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" How"},"finish_reason":null}],"usage":null,"obfuscation":"i1G"}
26+
data: {"id":"chatcmpl-CykKataoyjM1Yk8bJpDvOBdhXLMtu","object":"chat.completion.chunk","created":1768593812,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":null,"obfuscation":"d"}
2727
28-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" can"},"finish_reason":null}],"usage":null,"obfuscation":"yFM"}
29-
30-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" I"},"finish_reason":null}],"usage":null,"obfuscation":"L8FUi"}
31-
32-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" help"},"finish_reason":null}],"usage":null,"obfuscation":"b0"}
33-
34-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" you"},"finish_reason":null}],"usage":null,"obfuscation":"UmQ"}
35-
36-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":" today"},"finish_reason":null}],"usage":null,"obfuscation":"7"}
37-
38-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"content":"?"},"finish_reason":null}],"usage":null,"obfuscation":"36Ullb"}
39-
40-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":null,"obfuscation":"2"}
41-
42-
data: {"id":"chatcmpl-Cyg4WkSjCDUBxPviMs09eFjYBD3x1","object":"chat.completion.chunk","created":1768577440,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":373,"completion_tokens":18,"total_tokens":391,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"NkWx"}
28+
data: {"id":"chatcmpl-CykKataoyjM1Yk8bJpDvOBdhXLMtu","object":"chat.completion.chunk","created":1768593812,"model":"gpt-5-mini-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":373,"completion_tokens":75,"total_tokens":448,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":64,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"CRG"}
4329
4430
data: [DONE]
4531
4632
headers: {}
4733
status: 200 OK
4834
code: 200
49-
duration: 2.448987709s
35+
duration: 2.228273792s

0 commit comments

Comments
 (0)