@@ -418,6 +418,60 @@ def _fallback_context_window(model_id: str, context_window: int | None) -> int |
418418 return None
419419
420420
421+ def _fallback_max_output_tokens (
422+ model_id : str ,
423+ max_output_tokens : int | None ,
424+ max_tokens : int | None ,
425+ context_window : int | None ,
426+ model_type : str | None ,
427+ ) -> int | None :
428+ """Infer max_output_tokens when upstream metadata omits it."""
429+ if max_output_tokens is not None :
430+ return max_output_tokens
431+
432+ # Embedding-style models do not have completion output token budgets.
433+ lowered_type = (model_type or "" ).lower ()
434+ lowered_id = model_id .lower ()
435+ if "embedding" in lowered_type or "embed" in lowered_id :
436+ return None
437+
438+ # When a provider exposes max_tokens, use that as the best available bound.
439+ if max_tokens is not None :
440+ return max_tokens
441+
442+ # Family/model heuristics for common chat/completion models in this deployment.
443+ if "gpt-oss-120b" in lowered_id or "gpt-oss:120b" in lowered_id :
444+ return 131072
445+ if "gpt-oss-20b" in lowered_id or "gpt-oss:20b" in lowered_id :
446+ return 65536
447+ if "deepseek-v3" in lowered_id :
448+ return 65536
449+ if "qwen3-coder" in lowered_id :
450+ return 65536
451+ if "qwen3" in lowered_id or "qwen-3" in lowered_id :
452+ return 32768
453+ if "kimi-k2.5" in lowered_id :
454+ return 65536
455+ if "kimi-k2" in lowered_id :
456+ return 32768
457+ if "minimax-m2.5" in lowered_id :
458+ return 65536
459+ if "glm-5" in lowered_id :
460+ return 65536
461+ if "glm-4.7" in lowered_id or "glm-4.6" in lowered_id :
462+ return 32768
463+ if "llama-3.3" in lowered_id :
464+ return 16384
465+ if "llama-3.1" in lowered_id or "llama-3.2" in lowered_id :
466+ return 8192
467+
468+ # Last resort: derive from context window, but keep sane limits.
469+ if context_window is not None :
470+ return max (2048 , min (context_window // 2 , 65536 ))
471+
472+ return None
473+
474+
421475def _extract_tags (raw : dict ) -> list [str ]:
422476 """Extract and normalize tags from common payload sections."""
423477
@@ -595,6 +649,13 @@ def from_raw(cls, model_id: str, raw: dict, database_id: str | None = None) -> "
595649 model_type = _extract_model_type (model_id , raw , capabilities )
596650 capabilities = _ensure_capabilities (model_id , capabilities , model_type )
597651 context_window = _fallback_context_window (model_id , context_window )
652+ max_output_tokens = _fallback_max_output_tokens (
653+ model_id = model_id ,
654+ max_output_tokens = max_output_tokens ,
655+ max_tokens = max_tokens ,
656+ context_window = context_window ,
657+ model_type = model_type ,
658+ )
598659 tags = _extract_tags (raw )
599660
600661 return cls (
0 commit comments