@@ -1161,4 +1161,71 @@ def test_explicit_max_output_tokens_not_overridden():
11611161 assert llm .max_output_tokens == 32768
11621162
11631163
1164+ @patch ("openhands.sdk.llm.llm.get_litellm_model_info" )
1165+ def test_max_output_tokens_capped_when_equal_to_context_window (
1166+ mock_get_model_info ,
1167+ ):
1168+ """max_output_tokens == context window leaves zero input headroom.
1169+
1170+ Strict providers (e.g. AWS Bedrock) reject every call when
1171+ max_output_tokens fills the entire context window.
1172+ """
1173+ mock_get_model_info .return_value = {
1174+ "max_output_tokens" : 262144 ,
1175+ "max_input_tokens" : 262144 ,
1176+ }
1177+
1178+ llm = LLM (
1179+ model = "litellm_proxy/test-model-equal-windows" ,
1180+ api_key = SecretStr ("test-key" ),
1181+ usage_id = "test-llm" ,
1182+ )
1183+
1184+ assert llm .max_output_tokens == 262144 // 2
1185+ assert llm .max_input_tokens == 262144
1186+
1187+
1188+ @patch ("openhands.sdk.llm.llm.get_litellm_model_info" )
1189+ def test_max_output_tokens_capped_when_equal_to_max_tokens (
1190+ mock_get_model_info ,
1191+ ):
1192+ """max_output_tokens == max_tokens should also be halved.
1193+
1194+ Some registries only provide max_tokens (context window) without
1195+ max_input_tokens. The guard should still fire.
1196+ """
1197+ mock_get_model_info .return_value = {
1198+ "max_output_tokens" : 131072 ,
1199+ "max_tokens" : 131072 ,
1200+ "max_input_tokens" : None ,
1201+ }
1202+
1203+ llm = LLM (
1204+ model = "litellm_proxy/test-model-max-tokens-only" ,
1205+ api_key = SecretStr ("test-key" ),
1206+ usage_id = "test-llm" ,
1207+ )
1208+
1209+ assert llm .max_output_tokens == 131072 // 2
1210+
1211+
1212+ @patch ("openhands.sdk.llm.llm.get_litellm_model_info" )
1213+ def test_max_output_tokens_not_capped_when_below_context_window (
1214+ mock_get_model_info ,
1215+ ):
1216+ """max_output_tokens < context window should be used as-is."""
1217+ mock_get_model_info .return_value = {
1218+ "max_output_tokens" : 8192 ,
1219+ "max_input_tokens" : 200000 ,
1220+ }
1221+
1222+ llm = LLM (
1223+ model = "anthropic/claude-3-5-sonnet-latest" ,
1224+ api_key = SecretStr ("test-key" ),
1225+ usage_id = "test-llm" ,
1226+ )
1227+
1228+ assert llm .max_output_tokens == 8192
1229+
1230+
11641231# LLM Registry Tests
0 commit comments