Skip to content

Commit 429d683

Browse files
Improve routing-group HA sync and refresh free-tier presets
1 parent d824609 commit 429d683

File tree

11 files changed

+237
-50
lines changed

11 files changed

+237
-50
lines changed

backend/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Backend sync worker service."""
22

3-
__version__ = "0.6.25"
3+
__version__ = "0.6.28"

backend/litellm_client.py

Lines changed: 150 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""LiteLLM API client for pushing models."""
22
import logging
3+
from collections import OrderedDict
34
import httpx
45

56
from shared.models import ModelMetadata
@@ -220,6 +221,8 @@ async def list_routing_group_deployments(config) -> list[dict]:
220221
"group": group_name,
221222
"provider": _extract_tag_value(tags, "provider:") or "",
222223
"model_id": _extract_tag_value(tags, "model:") or "",
224+
"routing_target": _extract_tag_value(tags, "routing_target:") or "",
225+
"routing_slot": _extract_tag_value(tags, "routing_slot:") or "",
223226
"model_name": model.get("model_name"),
224227
"model_info_id": model.get("model_info", {}).get("id"),
225228
"created_by": model.get("model_info", {}).get("created_by"),
@@ -635,6 +638,86 @@ def _merge_pricing_fields(target: dict, source: dict) -> None:
635638
target[key] = value
636639

637640

641+
async def _set_group_fallbacks(
642+
client: httpx.AsyncClient,
643+
base_url: str,
644+
api_key: str | None,
645+
group_name: str,
646+
fallback_models: list[str],
647+
) -> None:
648+
"""Configure general fallbacks for a routing group model."""
649+
url = f"{base_url.rstrip('/')}/fallback"
650+
headers = _make_auth_headers(api_key)
651+
payload = {
652+
"model": group_name,
653+
"fallback_models": fallback_models,
654+
"fallback_type": "general",
655+
}
656+
response = await client.post(url, json=payload, headers=headers, timeout=DEFAULT_TIMEOUT)
657+
response.raise_for_status()
658+
659+
660+
async def _clear_group_fallbacks(
661+
client: httpx.AsyncClient,
662+
base_url: str,
663+
api_key: str | None,
664+
group_name: str,
665+
) -> None:
666+
"""Delete general fallback configuration for a routing group model."""
667+
url = f"{base_url.rstrip('/')}/fallback/{group_name}"
668+
headers = _make_auth_headers(api_key)
669+
response = await client.delete(
670+
url,
671+
params={"fallback_type": "general"},
672+
headers=headers,
673+
timeout=DEFAULT_TIMEOUT,
674+
)
675+
if response.status_code == 404:
676+
return
677+
response.raise_for_status()
678+
679+
680+
async def delete_routing_group_from_litellm(config, group_name: str) -> dict:
681+
"""Delete all LiteLLM deployments/fallbacks for one routing group."""
682+
if not config.litellm_base_url:
683+
raise RuntimeError("LiteLLM destination not configured")
684+
685+
group_tag = f"routing_group:{group_name}".lower()
686+
stats = {"deleted": 0, "errors": 0, "fallback_deleted": 0}
687+
688+
async with httpx.AsyncClient() as client:
689+
litellm_models = await fetch_litellm_models(client, config.litellm_base_url, config.litellm_api_key)
690+
for model in litellm_models:
691+
tags = _collect_litellm_tags(model)
692+
if group_tag not in tags:
693+
continue
694+
if model.get("model_info", {}).get("created_by") != "routing_group":
695+
continue
696+
model_id = model.get("model_info", {}).get("id")
697+
if not model_id:
698+
continue
699+
try:
700+
await delete_model_from_litellm(
701+
client,
702+
config.litellm_base_url,
703+
config.litellm_api_key,
704+
model_id,
705+
)
706+
stats["deleted"] += 1
707+
except Exception as exc:
708+
stats["errors"] += 1
709+
logger.warning("Failed deleting routing group entry %s: %s", model_id, exc)
710+
711+
try:
712+
await _clear_group_fallbacks(client, config.litellm_base_url, config.litellm_api_key, group_name)
713+
stats["fallback_deleted"] = 1
714+
except Exception as exc:
715+
stats["errors"] += 1
716+
logger.warning("Failed deleting fallback config for %s: %s", group_name, exc)
717+
718+
return stats
719+
720+
638721
async def push_routing_groups_to_litellm(session, config, group_id: int | None = None) -> dict:
639722
"""Push routing groups to LiteLLM as model groups."""
640723
if not config.litellm_base_url:
@@ -650,14 +733,25 @@ async def push_routing_groups_to_litellm(session, config, group_id: int | None =
650733
groups = [group] if group else []
651734

652735
groups = [g for g in groups if g is not None]
653-
stats = {"groups": len(groups), "added": 0, "deleted": 0, "missing_models": 0, "errors": 0}
736+
stats = {
737+
"groups": len(groups),
738+
"added": 0,
739+
"deleted": 0,
740+
"missing_models": 0,
741+
"errors": 0,
742+
"fallbacks_updated": 0,
743+
"fallbacks_deleted": 0,
744+
"targets_skipped_disabled": 0,
745+
}
654746

655747
async with httpx.AsyncClient() as client:
656748
litellm_models = await fetch_litellm_models(client, config.litellm_base_url, config.litellm_api_key)
657749

658750
for group in groups:
659751
group_tag = f"routing_group:{group.name}"
660752
group_tag_lower = group_tag.lower()
753+
fallback_models: list[str] = []
754+
seen_fallback_models: OrderedDict[str, bool] = OrderedDict()
661755

662756
for m in litellm_models:
663757
tags = m.get("litellm_params", {}).get("tags", [])
@@ -684,6 +778,9 @@ async def push_routing_groups_to_litellm(session, config, group_id: int | None =
684778
logger.warning("Failed deleting routing group entry %s: %s", model_id, exc)
685779

686780
for target in sorted(group.targets, key=lambda t: (t.priority, t.id)):
781+
if not target.enabled:
782+
stats["targets_skipped_disabled"] += 1
783+
continue
687784
provider = target.provider or await get_provider_by_id(session, target.provider_id)
688785
if not provider:
689786
stats["missing_models"] += 1
@@ -692,29 +789,66 @@ async def push_routing_groups_to_litellm(session, config, group_id: int | None =
692789
if not model:
693790
stats["missing_models"] += 1
694791
continue
792+
793+
fallback_name = model.get_display_name(apply_prefix=True)
794+
if fallback_name and fallback_name not in seen_fallback_models:
795+
seen_fallback_models[fallback_name] = True
796+
fallback_models.append(fallback_name)
797+
798+
slot_count = max(1, int(target.weight or 1))
799+
routing_target_tag = f"routing_target:{provider.id}:{model.model_id}"
800+
for slot in range(1, slot_count + 1):
801+
extra_tags = [group_tag, routing_target_tag, f"routing_slot:{slot}"]
802+
try:
803+
await push_model_to_litellm(
804+
client,
805+
config.litellm_base_url,
806+
config.litellm_api_key,
807+
provider,
808+
model,
809+
config=config,
810+
session=session,
811+
model_name_override=group.name,
812+
extra_tags=extra_tags,
813+
created_by="routing_group",
814+
strip_unique_id=True,
815+
)
816+
stats["added"] += 1
817+
except Exception as exc:
818+
stats["errors"] += 1
819+
logger.warning(
820+
"Failed pushing routing target %s/%s (slot=%s) for group %s: %s",
821+
provider.name,
822+
model.model_id,
823+
slot,
824+
group.name,
825+
exc,
826+
)
827+
828+
if fallback_models:
695829
try:
696-
await push_model_to_litellm(
830+
await _set_group_fallbacks(
697831
client,
698832
config.litellm_base_url,
699833
config.litellm_api_key,
700-
provider,
701-
model,
702-
config=config,
703-
session=session,
704-
model_name_override=group.name,
705-
extra_tags=[group_tag],
706-
created_by="routing_group",
707-
strip_unique_id=True,
834+
group.name,
835+
fallback_models,
708836
)
709-
stats["added"] += 1
837+
stats["fallbacks_updated"] += 1
710838
except Exception as exc:
711839
stats["errors"] += 1
712-
logger.warning(
713-
"Failed pushing routing target %s/%s for group %s: %s",
714-
provider.name,
715-
model.model_id,
840+
logger.warning("Failed updating fallback config for group %s: %s", group.name, exc)
841+
else:
842+
try:
843+
await _clear_group_fallbacks(
844+
client,
845+
config.litellm_base_url,
846+
config.litellm_api_key,
716847
group.name,
717-
exc,
718848
)
849+
stats["fallbacks_deleted"] += 1
850+
except Exception as exc:
851+
stats["errors"] += 1
852+
logger.warning("Failed clearing fallback config for empty group %s: %s", group.name, exc)
719853

720854
return stats

frontend/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Frontend API and UI service."""
22

3-
__version__ = "0.6.25"
3+
__version__ = "0.6.28"

frontend/routes/routing_groups.py

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Routing group management API routes."""
22
from __future__ import annotations
33

4+
from collections import Counter
5+
46
from fastapi import APIRouter, Depends, HTTPException, Query
57
from pydantic import BaseModel, Field
68
from sqlalchemy import select
@@ -9,7 +11,11 @@
911

1012
from shared.database import get_session
1113
from shared.crud import get_config
12-
from backend.litellm_client import push_routing_groups_to_litellm, list_routing_group_deployments
14+
from backend.litellm_client import (
15+
push_routing_groups_to_litellm,
16+
list_routing_group_deployments,
17+
delete_routing_group_from_litellm,
18+
)
1319
from shared.crud import (
1420
get_routing_groups,
1521
get_routing_group,
@@ -123,7 +129,15 @@ async def create_group(
123129
group = await get_routing_group(session, group.id)
124130
if not group:
125131
raise HTTPException(404, "Routing group not found after create")
126-
return _group_to_dict(group)
132+
response = _group_to_dict(group)
133+
134+
config = await get_config(session)
135+
if config.litellm_base_url:
136+
response["sync"] = await push_routing_groups_to_litellm(session, config, group_id=group.id)
137+
else:
138+
response["sync"] = {"skipped": "LiteLLM destination not configured"}
139+
140+
return response
127141

128142

129143
@router.get("/candidates")
@@ -174,42 +188,57 @@ async def routing_group_status(session: AsyncSession = Depends(get_session)) ->
174188

175189
for group in groups:
176190
db_targets = []
177-
db_keys = set()
191+
db_counts: Counter[str] = Counter()
178192
for target in sorted(group.targets, key=lambda t: (t.priority, t.id)):
179193
provider_name = target.provider.name if target.provider else None
194+
weight = max(1, int(target.weight or 1))
180195
db_targets.append(
181196
{
182197
"provider_name": provider_name,
183198
"model_id": target.model_id,
199+
"weight": weight,
184200
"enabled": target.enabled,
185201
}
186202
)
187203
if target.enabled:
188-
db_keys.add(_target_key(provider_name, target.model_id))
204+
db_counts[_target_key(provider_name, target.model_id)] += weight
189205

190206
litellm_targets = litellm_by_group.get(group.name, [])
191-
litellm_keys = {
207+
litellm_counts: Counter[str] = Counter(
192208
_target_key(entry.get("provider"), entry.get("model_id"))
193209
for entry in litellm_targets
194210
if entry.get("provider") and entry.get("model_id")
195-
}
211+
)
196212

197213
missing_in_litellm = [
198-
target for target in db_targets
199-
if target["enabled"] and _target_key(target["provider_name"], target["model_id"]) not in litellm_keys
200-
]
201-
extra_in_litellm = [
202-
entry for entry in litellm_targets
203-
if _target_key(entry.get("provider"), entry.get("model_id")) not in db_keys
214+
{
215+
"provider_name": item["provider_name"],
216+
"model_id": item["model_id"],
217+
"expected": item["weight"],
218+
"actual": litellm_counts.get(_target_key(item["provider_name"], item["model_id"]), 0),
219+
}
220+
for item in db_targets
221+
if item["enabled"]
222+
and litellm_counts.get(_target_key(item["provider_name"], item["model_id"]), 0) < item["weight"]
204223
]
224+
extra_in_litellm = []
225+
for entry in litellm_targets:
226+
key = _target_key(entry.get("provider"), entry.get("model_id"))
227+
expected = db_counts.get(key, 0)
228+
if expected <= 0:
229+
extra_in_litellm.append(entry)
230+
continue
231+
if litellm_counts[key] > expected:
232+
extra_in_litellm.append(entry)
233+
litellm_counts[key] -= 1
205234

206235
response_groups.append(
207236
{
208237
"id": group.id,
209238
"name": group.name,
210239
"description": group.description,
211240
"db_targets": db_targets,
212-
"db_count": len([t for t in db_targets if t["enabled"]]),
241+
"db_count": sum(db_counts.values()),
213242
"litellm_count": len(litellm_targets),
214243
"litellm_targets": litellm_targets,
215244
"missing_in_litellm": missing_in_litellm,
@@ -284,7 +313,13 @@ async def update_group(
284313
except IntegrityError as exc:
285314
raise HTTPException(400, "Invalid routing group payload") from exc
286315

287-
return _group_to_dict(group)
316+
response = _group_to_dict(group)
317+
config = await get_config(session)
318+
if config.litellm_base_url:
319+
response["sync"] = await push_routing_groups_to_litellm(session, config, group_id=group.id)
320+
else:
321+
response["sync"] = {"skipped": "LiteLLM destination not configured"}
322+
return response
288323

289324

290325
@router.delete("/{group_id}")
@@ -293,5 +328,9 @@ async def remove_group(group_id: int, session: AsyncSession = Depends(get_sessio
293328
group = await get_routing_group(session, group_id, include_children=False)
294329
if not group:
295330
raise HTTPException(404, "Routing group not found")
331+
config = await get_config(session)
332+
cleanup = None
333+
if config.litellm_base_url:
334+
cleanup = await delete_routing_group_from_litellm(config, group.name)
296335
await delete_routing_group(session, group)
297-
return {"status": "ok"}
336+
return {"status": "ok", "cleanup": cleanup}

frontend/templates/routing_groups.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -473,11 +473,11 @@ <h4>${group.name}</h4>
473473
return;
474474
}
475475

476-
summary.textContent = `DB targets (enabled): ${status.db_count} · LiteLLM deployments: ${status.litellm_count}`;
476+
summary.textContent = `Expected deployments: ${status.db_count} · LiteLLM deployments: ${status.litellm_count}`;
477477

478478
if (status.missing_in_litellm.length) {
479479
const missingList = status.missing_in_litellm
480-
.map((item) => `${item.provider_name || "unknown"} / ${item.model_id}`)
480+
.map((item) => `${item.provider_name || "unknown"} / ${item.model_id} (${item.actual}/${item.expected})`)
481481
.join(", ");
482482
missingEl.textContent = `Missing in LiteLLM: ${missingList}`;
483483
} else {

proxy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.6.25"
1+
__version__ = "0.6.28"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "litellm-companion"
3-
version = "0.6.25"
3+
version = "0.6.28"
44
description = "Synchronize models from Ollama or OpenAI-compatible endpoints into LiteLLM"
55
authors = [
66
{name = "LiteLLM Companion Authors", email = "dev@example.com"}

shared/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Shared code between backend and frontend services."""
22

3-
__version__ = "0.6.25"
3+
__version__ = "0.6.28"

0 commit comments

Comments
 (0)