docker
diff --git a/‎cagent-schema.json‎
Lines changed: 6 additions & 4 deletions b/‎cagent-schema.json‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎e2e/cagent_exec_test.go‎
Lines changed: 25 additions & 6 deletions b/‎e2e/cagent_exec_test.go‎
Lines changed: 25 additions & 6 deletions
diff --git a/‎e2e/cagent_mcp_test.go‎
Lines changed: 2 additions & 1 deletion b/‎e2e/cagent_mcp_test.go‎
Lines changed: 2 additions & 1 deletion
@@ -451,15 +451,15 @@
         },
         "provider_opts": {
           "type": "object",
-          "description": "Provider-specific options. dmr: runtime_flags. anthropic: interleaved_thinking (boolean, default false). openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance).",
+          "description": "Provider-specific options. dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance).",
           "additionalProperties": true
         },
         "track_usage": {
           "type": "boolean",
           "description": "Whether to track usage"
         },
         "thinking_budget": {
-          "description": "Controls reasoning effort/budget. OpenAI: string levels ('minimal','low','medium','high'). Anthropic: integer token budget (1024-32768). Gemini: integer token budget (-1 for unlimited, 0 to disable, 24576 max).",
+          "description": "Controls reasoning effort/budget. OpenAI: string levels ('minimal','low','medium','high'), default 'medium'. Anthropic: integer token budget (1024-32768), default 8192. Amazon Bedrock (Claude): same as Anthropic. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max), default -1. Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'), default 'high' for Pro, 'medium' for Flash.",
           "oneOf": [
             {
               "type": "string",
@@ -469,21 +469,23 @@
                 "medium",
                 "high"
               ],
-              "description": "Reasoning effort level (OpenAI)"
+              "description": "Reasoning effort level (OpenAI, Gemini 3)"
             },
             {
               "type": "integer",
               "minimum": -1,
               "maximum": 32768,
-              "description": "Token budget for extended thinking (Anthropic, Google)"
+              "description": "Token budget for extended thinking (Anthropic, Bedrock Claude, Gemini 2.5)"
             }
           ],
           "examples": [
             "minimal",
             "low",
             "medium",
             "high",
+            -1,
             1024,
+            8192,
             32768
           ]
         },
 
@@ -45,37 +45,56 @@ func TestExec_OpenAI_gpt5_1(t *testing.T) {
 func TestExec_OpenAI_gpt5_codex(t *testing.T) {
 	out := cagentExec(t, "testdata/basic.yaml", "--model=openai/gpt-5-codex", "What's 2+2?")
 
-	require.Equal(t, "\n--- Agent: root ---\n**Preparing to answer question 4**2 + 2 = 4.", out)
+	// Model reasoning summary varies, just check for the core response
+	require.Contains(t, out, "--- Agent: root ---")
+	require.Contains(t, out, "2 + 2 = 4")
 }
 
 func TestExec_Anthropic(t *testing.T) {
 	out := cagentExec(t, "testdata/basic.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?")
 
-	require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
+	// With interleaved thinking enabled by default, Anthropic responses include thinking content
+	require.Contains(t, out, "--- Agent: root ---")
+	require.Contains(t, out, "2 + 2 = 4")
 }
 
 func TestExec_Anthropic_ToolCall(t *testing.T) {
 	out := cagentExec(t, "testdata/fs_tools.yaml", "--model=anthropic/claude-sonnet-4-0", "How many files in testdata/working_dir? Only output the number.")
 
-	require.Equal(t, "\n--- Agent: root ---\n\nCalling list_directory(path: \"testdata/working_dir\")\n\nlist_directory response → \"FILE README.me\\n\"\n1", out)
+	// With interleaved thinking enabled by default, Anthropic responses include thinking content
+	require.Contains(t, out, "--- Agent: root ---")
+	require.Contains(t, out, `Calling list_directory(path: "testdata/working_dir")`)
+	require.Contains(t, out, `list_directory response → "FILE README.me\n"`)
+	// The response should end with "1" (the count)
+	require.True(t, out != "" && out[len(out)-1] == '1', "response should end with '1'")
 }
 
 func TestExec_Anthropic_AgentsMd(t *testing.T) {
 	out := cagentExec(t, "testdata/agents-md.yaml", "--model=anthropic/claude-sonnet-4-0", "What's 2+2?")
 
-	require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
+	// With interleaved thinking enabled by default, Anthropic responses include thinking content
+	require.Contains(t, out, "--- Agent: root ---")
+	require.Contains(t, out, "2 + 2 = 4")
 }
 
 func TestExec_Gemini(t *testing.T) {
 	out := cagentExec(t, "testdata/basic.yaml", "--model=google/gemini-2.5-flash", "What's 2+2?")
 
-	require.Equal(t, "\n--- Agent: root ---\n2 + 2 = 4", out)
+	// With thinking enabled by default (dynamic thinking for Gemini 2.5), responses may include thinking content
+	require.Contains(t, out, "--- Agent: root ---")
+	// The response should contain the answer "4" somewhere
+	require.Contains(t, out, "4")
 }
 
 func TestExec_Gemini_ToolCall(t *testing.T) {
 	out := cagentExec(t, "testdata/fs_tools.yaml", "--model=google/gemini-2.5-flash", "How many files in testdata/working_dir? Only output the number.")
 
-	require.Equal(t, "\n--- Agent: root ---\n\nCalling list_directory(path: \"testdata/working_dir\")\n\nlist_directory response → \"FILE README.me\\n\"\n1", out)
+	// With thinking enabled by default (dynamic thinking for Gemini 2.5), responses include thinking content
+	require.Contains(t, out, "--- Agent: root ---")
+	require.Contains(t, out, `Calling list_directory(path: "testdata/working_dir")`)
+	require.Contains(t, out, `list_directory response → "FILE README.me\n"`)
+	// The response should end with "1" (the count)
+	require.True(t, out != "" && out[len(out)-1] == '1', "response should end with '1'")
 }
 
 func TestExec_Mistral(t *testing.T) {
 
@@ -54,5 +54,6 @@ func TestMCP_MultiAgent(t *testing.T) {
 	})
 
 	require.NoError(t, err)
-	assert.Equal(t, "Hello, nice to meet you!", output.Response)
+	// Model response to "say hello" can vary, just check it contains a greeting
+	assert.Contains(t, output.Response, "Hello")
 }
Original file line number	Diff line number	Diff line change
`@@ -54,5 +54,6 @@ func TestMCP_MultiAgent(t *testing.T) {`
`54`	`54`	`})`
`55`	`55`
`56`	`56`	`require.NoError(t, err)`
`57`		`- assert.Equal(t, "Hello, nice to meet you!", output.Response)`
	`57`	`+ // Model response to "say hello" can vary, just check it contains a greeting`
	`58`	`+ assert.Contains(t, output.Response, "Hello")`
`58`	`59`	`}`