Skip to content

Commit b04faf3

Browse files
committed
Adjust docs n tests for better model defaults
Signed-off-by: Christopher Petito <chrisjpetito@gmail.com>
1 parent 8e4bf74 commit b04faf3

16 files changed

+1876
-546
lines changed

cagent-schema.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -451,15 +451,15 @@
451451
},
452452
"provider_opts": {
453453
"type": "object",
454-
"description": "Provider-specific options. dmr: runtime_flags. anthropic: interleaved_thinking (boolean, default false). openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance).",
454+
"description": "Provider-specific options. dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance).",
455455
"additionalProperties": true
456456
},
457457
"track_usage": {
458458
"type": "boolean",
459459
"description": "Whether to track usage"
460460
},
461461
"thinking_budget": {
462-
"description": "Controls reasoning effort/budget. OpenAI: string levels ('minimal','low','medium','high'). Anthropic: integer token budget (1024-32768). Gemini: integer token budget (-1 for unlimited, 0 to disable, 24576 max).",
462+
"description": "Controls reasoning effort/budget. OpenAI: string levels ('minimal','low','medium','high'), default 'medium'. Anthropic: integer token budget (1024-32768), default 8192. Amazon Bedrock (Claude): same as Anthropic. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max), default -1. Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'), default 'high' for Pro, 'medium' for Flash.",
463463
"oneOf": [
464464
{
465465
"type": "string",
@@ -469,21 +469,23 @@
469469
"medium",
470470
"high"
471471
],
472-
"description": "Reasoning effort level (OpenAI)"
472+
"description": "Reasoning effort level (OpenAI, Gemini 3)"
473473
},
474474
{
475475
"type": "integer",
476476
"minimum": -1,
477477
"maximum": 32768,
478-
"description": "Token budget for extended thinking (Anthropic, Google)"
478+
"description": "Token budget for extended thinking (Anthropic, Bedrock Claude, Gemini 2.5)"
479479
}
480480
],
481481
"examples": [
482482
"minimal",
483483
"low",
484484
"medium",
485485
"high",
486+
-1,
486487
1024,
488+
8192,
487489
32768
488490
]
489491
},

e2e/cagent_exec_test.go

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,37 +45,56 @@ func TestExec_OpenAI_gpt5_1(t *testing.T) {
4545
func TestExec_OpenAI_gpt5_codex(t *testing.T) {
4646
out := cagentExec(t, "testdata/basic.yaml", "--model=openai/gpt-5-codex", "What's 2+2?")
4747

48-
require.Equal(t, "\n--- Agent: root ---\n**Preparing to answer question 4**2 + 2 = 4.", out)
48+
// Model reasoning summary varies, just check for the core response
49+
require.Contains(t, out, "--- Agent: root ---")
50+
require.Contains(t, out, "2 + 2 = 4")
4951
}
5052

5153
func TestExec_Anthropic(t *testing.T) {
5254
out := cagentExec(t, "testdata/basic.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?")
5355

54-
require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
56+
// With interleaved thinking enabled by default, Anthropic responses include thinking content
57+
require.Contains(t, out, "--- Agent: root ---")
58+
require.Contains(t, out, "2 + 2 = 4")
5559
}
5660

5761
func TestExec_Anthropic_ToolCall(t *testing.T) {
5862
out := cagentExec(t, "testdata/fs_tools.yaml", "--model=anthropic/claude-sonnet-4-0", "How many files in testdata/working_dir? Only output the number.")
5963

60-
require.Equal(t, "\n--- Agent: root ---\n\nCalling list_directory(path: \"testdata/working_dir\")\n\nlist_directory response → \"FILE README.me\\n\"\n1", out)
64+
// With interleaved thinking enabled by default, Anthropic responses include thinking content
65+
require.Contains(t, out, "--- Agent: root ---")
66+
require.Contains(t, out, `Calling list_directory(path: "testdata/working_dir")`)
67+
require.Contains(t, out, `list_directory response → "FILE README.me\n"`)
68+
// The response should end with "1" (the count)
69+
require.True(t, out != "" && out[len(out)-1] == '1', "response should end with '1'")
6170
}
6271

6372
func TestExec_Anthropic_AgentsMd(t *testing.T) {
6473
out := cagentExec(t, "testdata/agents-md.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?")
6574

66-
require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
75+
// With interleaved thinking enabled by default, Anthropic responses include thinking content
76+
require.Contains(t, out, "--- Agent: root ---")
77+
require.Contains(t, out, "2 + 2 = 4")
6778
}
6879

6980
func TestExec_Gemini(t *testing.T) {
7081
out := cagentExec(t, "testdata/basic.yaml", "--model=google/gemini-2.5-flash", "What's 2+2?")
7182

72-
require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
83+
// With thinking enabled by default (dynamic thinking for Gemini 2.5), responses may include thinking content
84+
require.Contains(t, out, "--- Agent: root ---")
85+
// The response should contain the answer "4" somewhere
86+
require.Contains(t, out, "4")
7387
}
7488

7589
func TestExec_Gemini_ToolCall(t *testing.T) {
7690
out := cagentExec(t, "testdata/fs_tools.yaml", "--model=google/gemini-2.5-flash", "How many files in testdata/working_dir? Only output the number.")
7791

78-
require.Equal(t, "\n--- Agent: root ---\n\nCalling list_directory(path: \"testdata/working_dir\")\n\nlist_directory response → \"FILE README.me\\n\"\n1", out)
92+
// With thinking enabled by default (dynamic thinking for Gemini 2.5), responses include thinking content
93+
require.Contains(t, out, "--- Agent: root ---")
94+
require.Contains(t, out, `Calling list_directory(path: "testdata/working_dir")`)
95+
require.Contains(t, out, `list_directory response → "FILE README.me\n"`)
96+
// The response should end with "1" (the count)
97+
require.True(t, out != "" && out[len(out)-1] == '1', "response should end with '1'")
7998
}
8099

81100
func TestExec_Mistral(t *testing.T) {

e2e/cagent_mcp_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,6 @@ func TestMCP_MultiAgent(t *testing.T) {
5454
})
5555

5656
require.NoError(t, err)
57-
assert.Equal(t, "Hello, nice to meet you!", output.Response)
57+
// Model response to "say hello" can vary, just check it contains a greeting
58+
assert.Contains(t, output.Response, "Hello")
5859
}

0 commit comments

Comments
 (0)