Skip to content

Commit 14c0b03

Browse files
Add max_tokens fallback inference for models missing provider metadata
1 parent b6d675f commit 14c0b03

File tree

6 files changed

+57
-5
lines changed

6 files changed

+57
-5
lines changed

backend/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Backend sync worker service."""
22

3-
__version__ = "0.6.31"
3+
__version__ = "0.6.32"

frontend/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Frontend API and UI service."""
22

3-
__version__ = "0.6.31"
3+
__version__ = "0.6.32"

proxy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.6.31"
1+
__version__ = "0.6.32"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "litellm-companion"
3-
version = "0.6.31"
3+
version = "0.6.32"
44
description = "Synchronize models from Ollama or OpenAI-compatible endpoints into LiteLLM"
55
authors = [
66
{name = "LiteLLM Companion Authors", email = "dev@example.com"}

shared/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Shared code between backend and frontend services."""
22

3-
__version__ = "0.6.31"
3+
__version__ = "0.6.32"

shared/models.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,52 @@ def _fallback_max_output_tokens(
472472
return None
473473

474474

475+
def _fallback_max_tokens(
476+
model_id: str,
477+
max_tokens: int | None,
478+
context_window: int | None,
479+
model_type: str | None,
480+
) -> int | None:
481+
"""Infer max_tokens when upstream metadata omits it."""
482+
if max_tokens is not None:
483+
return max_tokens
484+
485+
lowered_type = (model_type or "").lower()
486+
lowered_id = model_id.lower()
487+
if "embedding" in lowered_type or "embed" in lowered_id:
488+
return None
489+
490+
if "gpt-oss-120b" in lowered_id or "gpt-oss:120b" in lowered_id:
491+
return 131072
492+
if "gpt-oss-20b" in lowered_id or "gpt-oss:20b" in lowered_id:
493+
return 65536
494+
if "deepseek-v3" in lowered_id:
495+
return 65536
496+
if "qwen3-coder" in lowered_id:
497+
return 65536
498+
if "qwen3" in lowered_id or "qwen-3" in lowered_id:
499+
return 32768
500+
if "kimi-k2.5" in lowered_id:
501+
return 65536
502+
if "kimi-k2" in lowered_id:
503+
return 32768
504+
if "minimax-m2.5" in lowered_id:
505+
return 65536
506+
if "glm-5" in lowered_id:
507+
return 65536
508+
if "glm-4.7" in lowered_id or "glm-4.6" in lowered_id:
509+
return 32768
510+
if "llama-3.3" in lowered_id:
511+
return 16384
512+
if "llama-3.1" in lowered_id or "llama-3.2" in lowered_id:
513+
return 8192
514+
515+
if context_window is not None:
516+
return max(2048, min(context_window, 262144))
517+
518+
return None
519+
520+
475521
def _extract_tags(raw: dict) -> list[str]:
476522
"""Extract and normalize tags from common payload sections."""
477523

@@ -649,6 +695,12 @@ def from_raw(cls, model_id: str, raw: dict, database_id: str | None = None) -> "
649695
model_type = _extract_model_type(model_id, raw, capabilities)
650696
capabilities = _ensure_capabilities(model_id, capabilities, model_type)
651697
context_window = _fallback_context_window(model_id, context_window)
698+
max_tokens = _fallback_max_tokens(
699+
model_id=model_id,
700+
max_tokens=max_tokens,
701+
context_window=context_window,
702+
model_type=model_type,
703+
)
652704
max_output_tokens = _fallback_max_output_tokens(
653705
model_id=model_id,
654706
max_output_tokens=max_output_tokens,

0 commit comments

Comments
 (0)