Skip to content

Commit 8e7ede7

Browse files
committed
add and fix tests checking thinking gets disabled explcitly when undefined on yaml configs < v4
Signed-off-by: Christopher Petito <chrisjpetito@gmail.com>
1 parent 572e9e7 commit 8e7ede7

13 files changed

+747
-198
lines changed

cmd/root/run_test.go

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
package root
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
8+
"github.com/docker/cagent/pkg/config/latest"
9+
"github.com/docker/cagent/pkg/teamloader"
10+
)
11+
12+
func TestComputeInitialThinking(t *testing.T) {
13+
t.Parallel()
14+
15+
tests := []struct {
16+
name string
17+
loadResult *teamloader.LoadResult
18+
agentName string
19+
expected bool
20+
}{
21+
{
22+
name: "v4 config - thinking enabled by default",
23+
loadResult: &teamloader.LoadResult{
24+
ConfigVersion: "4",
25+
AgentDefaultModels: map[string]string{"root": "my_model"},
26+
Models: map[string]latest.ModelConfig{"my_model": {Provider: "openai", Model: "gpt-4o"}},
27+
},
28+
agentName: "root",
29+
expected: true,
30+
},
31+
{
32+
name: "empty version (latest) - thinking enabled by default",
33+
loadResult: &teamloader.LoadResult{
34+
ConfigVersion: "",
35+
AgentDefaultModels: map[string]string{"root": "my_model"},
36+
Models: map[string]latest.ModelConfig{"my_model": {Provider: "openai", Model: "gpt-4o"}},
37+
},
38+
agentName: "root",
39+
expected: true,
40+
},
41+
{
42+
name: "v3 config - no thinking_budget - disabled",
43+
loadResult: &teamloader.LoadResult{
44+
ConfigVersion: "3",
45+
AgentDefaultModels: map[string]string{"root": "my_model"},
46+
Models: map[string]latest.ModelConfig{"my_model": {Provider: "openai", Model: "gpt-4o"}},
47+
},
48+
agentName: "root",
49+
expected: false,
50+
},
51+
{
52+
name: "v3 config - thinking_budget explicitly set - enabled",
53+
loadResult: &teamloader.LoadResult{
54+
ConfigVersion: "3",
55+
AgentDefaultModels: map[string]string{"root": "my_model"},
56+
Models: map[string]latest.ModelConfig{
57+
"my_model": {
58+
Provider: "openai",
59+
Model: "gpt-4o",
60+
ThinkingBudget: &latest.ThinkingBudget{Effort: "medium"},
61+
},
62+
},
63+
},
64+
agentName: "root",
65+
expected: true,
66+
},
67+
{
68+
name: "v3 config - thinking_budget explicitly disabled with tokens=0",
69+
loadResult: &teamloader.LoadResult{
70+
ConfigVersion: "3",
71+
AgentDefaultModels: map[string]string{"root": "my_model"},
72+
Models: map[string]latest.ModelConfig{
73+
"my_model": {
74+
Provider: "anthropic",
75+
Model: "claude-sonnet-4-0",
76+
ThinkingBudget: &latest.ThinkingBudget{Tokens: 0},
77+
},
78+
},
79+
},
80+
agentName: "root",
81+
expected: false,
82+
},
83+
{
84+
name: "v3 config - thinking_budget explicitly disabled with effort=none",
85+
loadResult: &teamloader.LoadResult{
86+
ConfigVersion: "3",
87+
AgentDefaultModels: map[string]string{"root": "my_model"},
88+
Models: map[string]latest.ModelConfig{
89+
"my_model": {
90+
Provider: "openai",
91+
Model: "gpt-4o",
92+
ThinkingBudget: &latest.ThinkingBudget{Effort: "none"},
93+
},
94+
},
95+
},
96+
agentName: "root",
97+
expected: false,
98+
},
99+
{
100+
name: "v0 config - inline model spec - disabled (no explicit config)",
101+
loadResult: &teamloader.LoadResult{
102+
ConfigVersion: "0",
103+
AgentDefaultModels: map[string]string{"root": "openai/gpt-4o"},
104+
Models: map[string]latest.ModelConfig{}, // inline specs won't be in Models
105+
},
106+
agentName: "root",
107+
expected: false,
108+
},
109+
{
110+
name: "v1 config - no model reference - disabled",
111+
loadResult: &teamloader.LoadResult{
112+
ConfigVersion: "1",
113+
AgentDefaultModels: map[string]string{}, // missing agent
114+
Models: map[string]latest.ModelConfig{},
115+
},
116+
agentName: "root",
117+
expected: false,
118+
},
119+
{
120+
name: "v2 config - comma-separated models - checks first model",
121+
loadResult: &teamloader.LoadResult{
122+
ConfigVersion: "2",
123+
AgentDefaultModels: map[string]string{"root": "model_a,model_b"},
124+
Models: map[string]latest.ModelConfig{
125+
"model_a": {
126+
Provider: "openai",
127+
Model: "gpt-4o",
128+
ThinkingBudget: &latest.ThinkingBudget{Effort: "high"},
129+
},
130+
"model_b": {
131+
Provider: "anthropic",
132+
Model: "claude-sonnet-4-0",
133+
},
134+
},
135+
},
136+
agentName: "root",
137+
expected: true, // first model has thinking enabled
138+
},
139+
{
140+
name: "v3 config - thinking_budget with tokens > 0 - enabled",
141+
loadResult: &teamloader.LoadResult{
142+
ConfigVersion: "3",
143+
AgentDefaultModels: map[string]string{"root": "my_model"},
144+
Models: map[string]latest.ModelConfig{
145+
"my_model": {
146+
Provider: "anthropic",
147+
Model: "claude-sonnet-4-0",
148+
ThinkingBudget: &latest.ThinkingBudget{Tokens: 8192},
149+
},
150+
},
151+
},
152+
agentName: "root",
153+
expected: true,
154+
},
155+
}
156+
157+
for _, tt := range tests {
158+
t.Run(tt.name, func(t *testing.T) {
159+
t.Parallel()
160+
result := computeInitialThinking(tt.loadResult, tt.agentName)
161+
assert.Equal(t, tt.expected, result)
162+
})
163+
}
164+
}
165+
166+
// TestThinkingDisabledForOldConfigVersions specifically tests that thinking is disabled
167+
// for v1, v2, v3 configs when thinking_budget is NOT defined, even with thinking-capable models.
168+
// This is the key behavioral change: old configs default to thinking=off.
169+
func TestThinkingDisabledForOldConfigVersions(t *testing.T) {
170+
t.Parallel()
171+
172+
// All these models would normally get thinking defaults in v4,
173+
// but should have thinking DISABLED in v1/v2/v3 when not explicitly configured.
174+
thinkingCapableModels := []latest.ModelConfig{
175+
{Provider: "openai", Model: "gpt-4o"},
176+
{Provider: "openai", Model: "gpt-5"},
177+
{Provider: "anthropic", Model: "claude-sonnet-4-0"},
178+
{Provider: "google", Model: "gemini-2.5-pro"},
179+
{Provider: "amazon-bedrock", Model: "anthropic.claude-3-sonnet"},
180+
}
181+
182+
oldVersions := []string{"0", "1", "2", "3"}
183+
184+
for _, version := range oldVersions {
185+
for _, modelCfg := range thinkingCapableModels {
186+
modelName := modelCfg.Provider + "/" + modelCfg.Model
187+
t.Run("v"+version+"_"+modelName+"_no_thinking_budget", func(t *testing.T) {
188+
t.Parallel()
189+
190+
loadResult := &teamloader.LoadResult{
191+
ConfigVersion: version,
192+
AgentDefaultModels: map[string]string{"root": "my_model"},
193+
Models: map[string]latest.ModelConfig{
194+
"my_model": modelCfg, // No ThinkingBudget set
195+
},
196+
}
197+
198+
result := computeInitialThinking(loadResult, "root")
199+
assert.False(t, result,
200+
"v%s config with %s should have thinking DISABLED when thinking_budget is not defined",
201+
version, modelName)
202+
})
203+
}
204+
}
205+
}
206+
207+
// TestThinkingEnabledForV4 verifies that v4 configs get thinking enabled by default
208+
// (the opposite of old configs).
209+
func TestThinkingEnabledForV4(t *testing.T) {
210+
t.Parallel()
211+
212+
loadResult := &teamloader.LoadResult{
213+
ConfigVersion: "4",
214+
AgentDefaultModels: map[string]string{"root": "my_model"},
215+
Models: map[string]latest.ModelConfig{
216+
"my_model": {Provider: "openai", Model: "gpt-5"}, // No ThinkingBudget set
217+
},
218+
}
219+
220+
result := computeInitialThinking(loadResult, "root")
221+
assert.True(t, result, "v4 config should have thinking ENABLED by default")
222+
}
223+
224+
// TestThinkingExplicitlySetInOldConfigs verifies that when a user explicitly sets
225+
// thinking_budget in an old config, it IS respected and thinking is enabled.
226+
func TestThinkingExplicitlySetInOldConfigs(t *testing.T) {
227+
t.Parallel()
228+
229+
oldVersions := []string{"1", "2", "3"}
230+
explicitConfigs := []latest.ThinkingBudget{
231+
{Effort: "medium"},
232+
{Effort: "high"},
233+
{Tokens: 8192},
234+
{Tokens: 16000},
235+
}
236+
237+
for _, version := range oldVersions {
238+
for _, budget := range explicitConfigs {
239+
budgetDesc := ""
240+
if budget.Effort != "" {
241+
budgetDesc = "effort_" + budget.Effort
242+
} else {
243+
budgetDesc = "tokens_" + string(rune(budget.Tokens))
244+
}
245+
t.Run("v"+version+"_explicit_"+budgetDesc, func(t *testing.T) {
246+
t.Parallel()
247+
248+
loadResult := &teamloader.LoadResult{
249+
ConfigVersion: version,
250+
AgentDefaultModels: map[string]string{"root": "my_model"},
251+
Models: map[string]latest.ModelConfig{
252+
"my_model": {
253+
Provider: "openai",
254+
Model: "gpt-4o",
255+
ThinkingBudget: &budget,
256+
},
257+
},
258+
}
259+
260+
result := computeInitialThinking(loadResult, "root")
261+
assert.True(t, result,
262+
"v%s config with explicit thinking_budget should have thinking ENABLED",
263+
version)
264+
})
265+
}
266+
}
267+
}

e2e/cagent_exec_test.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,15 @@ func TestExec_OpenAI(t *testing.T) {
1818
require.Equal(t, "\n--- Agent: root ---\n2 + 2 equals 4.", out)
1919
}
2020

21+
// TestExec_OpenAI_V3Config tests that v3 configs work correctly with thinking disabled by default.
22+
// This uses gpt-5 with a v3 config file to verify thinking is disabled for old config versions.
23+
func TestExec_OpenAI_V3Config(t *testing.T) {
24+
out := cagentExec(t, "testdata/basic_v3.yaml", "What's 2+2?")
25+
26+
// v3 config with gpt-5 should work correctly (thinking disabled by default for old configs)
27+
require.Equal(t, "\n--- Agent: root ---\n4", out)
28+
}
29+
2130
func TestExec_OpenAI_ToolCall(t *testing.T) {
2231
out := cagentExec(t, "testdata/fs_tools.yaml", "How many files in testdata/working_dir? Only output the number.")
2332

@@ -47,7 +56,7 @@ func TestExec_OpenAI_gpt5_codex(t *testing.T) {
4756

4857
// Model reasoning summary varies, just check for the core response
4958
require.Contains(t, out, "--- Agent: root ---")
50-
require.Contains(t, out, "2 + 2 = 4")
59+
require.Contains(t, out, "4")
5160
}
5261

5362
func TestExec_Anthropic(t *testing.T) {

e2e/testdata/basic_v3.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
version: "3"
2+
3+
agents:
4+
root:
5+
model: openai/gpt-5
6+
description: A helpful AI assistant
7+
instruction: |
8+
You are a knowledgeable assistant that helps users with various tasks.
9+
Be helpful, accurate, and concise in your responses.

0 commit comments

Comments
 (0)