11"""LiteLLM API client for pushing models."""
22import logging
3+ from collections import OrderedDict
34import httpx
45
56from shared .models import ModelMetadata
@@ -220,6 +221,8 @@ async def list_routing_group_deployments(config) -> list[dict]:
220221 "group" : group_name ,
221222 "provider" : _extract_tag_value (tags , "provider:" ) or "" ,
222223 "model_id" : _extract_tag_value (tags , "model:" ) or "" ,
224+ "routing_target" : _extract_tag_value (tags , "routing_target:" ) or "" ,
225+ "routing_slot" : _extract_tag_value (tags , "routing_slot:" ) or "" ,
223226 "model_name" : model .get ("model_name" ),
224227 "model_info_id" : model .get ("model_info" , {}).get ("id" ),
225228 "created_by" : model .get ("model_info" , {}).get ("created_by" ),
@@ -635,6 +638,86 @@ def _merge_pricing_fields(target: dict, source: dict) -> None:
635638 target [key ] = value
636639
637640
641+ async def _set_group_fallbacks (
642+ client : httpx .AsyncClient ,
643+ base_url : str ,
644+ api_key : str | None ,
645+ group_name : str ,
646+ fallback_models : list [str ],
647+ ) -> None :
648+ """Configure general fallbacks for a routing group model."""
649+ url = f"{ base_url .rstrip ('/' )} /fallback"
650+ headers = _make_auth_headers (api_key )
651+ payload = {
652+ "model" : group_name ,
653+ "fallback_models" : fallback_models ,
654+ "fallback_type" : "general" ,
655+ }
656+ response = await client .post (url , json = payload , headers = headers , timeout = DEFAULT_TIMEOUT )
657+ response .raise_for_status ()
658+
659+
660+ async def _clear_group_fallbacks (
661+ client : httpx .AsyncClient ,
662+ base_url : str ,
663+ api_key : str | None ,
664+ group_name : str ,
665+ ) -> None :
666+ """Delete general fallback configuration for a routing group model."""
667+ url = f"{ base_url .rstrip ('/' )} /fallback/{ group_name } "
668+ headers = _make_auth_headers (api_key )
669+ response = await client .delete (
670+ url ,
671+ params = {"fallback_type" : "general" },
672+ headers = headers ,
673+ timeout = DEFAULT_TIMEOUT ,
674+ )
675+ if response .status_code == 404 :
676+ return
677+ response .raise_for_status ()
678+
679+
680+ async def delete_routing_group_from_litellm (config , group_name : str ) -> dict :
681+ """Delete all LiteLLM deployments/fallbacks for one routing group."""
682+ if not config .litellm_base_url :
683+ raise RuntimeError ("LiteLLM destination not configured" )
684+
685+ group_tag = f"routing_group:{ group_name } " .lower ()
686+ stats = {"deleted" : 0 , "errors" : 0 , "fallback_deleted" : 0 }
687+
688+ async with httpx .AsyncClient () as client :
689+ litellm_models = await fetch_litellm_models (client , config .litellm_base_url , config .litellm_api_key )
690+ for model in litellm_models :
691+ tags = _collect_litellm_tags (model )
692+ if group_tag not in tags :
693+ continue
694+ if model .get ("model_info" , {}).get ("created_by" ) != "routing_group" :
695+ continue
696+ model_id = model .get ("model_info" , {}).get ("id" )
697+ if not model_id :
698+ continue
699+ try :
700+ await delete_model_from_litellm (
701+ client ,
702+ config .litellm_base_url ,
703+ config .litellm_api_key ,
704+ model_id ,
705+ )
706+ stats ["deleted" ] += 1
707+ except Exception as exc :
708+ stats ["errors" ] += 1
709+ logger .warning ("Failed deleting routing group entry %s: %s" , model_id , exc )
710+
711+ try :
712+ await _clear_group_fallbacks (client , config .litellm_base_url , config .litellm_api_key , group_name )
713+ stats ["fallback_deleted" ] = 1
714+ except Exception as exc :
715+ stats ["errors" ] += 1
716+ logger .warning ("Failed deleting fallback config for %s: %s" , group_name , exc )
717+
718+ return stats
719+
720+
638721async def push_routing_groups_to_litellm (session , config , group_id : int | None = None ) -> dict :
639722 """Push routing groups to LiteLLM as model groups."""
640723 if not config .litellm_base_url :
@@ -650,14 +733,25 @@ async def push_routing_groups_to_litellm(session, config, group_id: int | None =
650733 groups = [group ] if group else []
651734
652735 groups = [g for g in groups if g is not None ]
653- stats = {"groups" : len (groups ), "added" : 0 , "deleted" : 0 , "missing_models" : 0 , "errors" : 0 }
736+ stats = {
737+ "groups" : len (groups ),
738+ "added" : 0 ,
739+ "deleted" : 0 ,
740+ "missing_models" : 0 ,
741+ "errors" : 0 ,
742+ "fallbacks_updated" : 0 ,
743+ "fallbacks_deleted" : 0 ,
744+ "targets_skipped_disabled" : 0 ,
745+ }
654746
655747 async with httpx .AsyncClient () as client :
656748 litellm_models = await fetch_litellm_models (client , config .litellm_base_url , config .litellm_api_key )
657749
658750 for group in groups :
659751 group_tag = f"routing_group:{ group .name } "
660752 group_tag_lower = group_tag .lower ()
753+ fallback_models : list [str ] = []
754+ seen_fallback_models : OrderedDict [str , bool ] = OrderedDict ()
661755
662756 for m in litellm_models :
663757 tags = m .get ("litellm_params" , {}).get ("tags" , [])
@@ -684,6 +778,9 @@ async def push_routing_groups_to_litellm(session, config, group_id: int | None =
684778 logger .warning ("Failed deleting routing group entry %s: %s" , model_id , exc )
685779
686780 for target in sorted (group .targets , key = lambda t : (t .priority , t .id )):
781+ if not target .enabled :
782+ stats ["targets_skipped_disabled" ] += 1
783+ continue
687784 provider = target .provider or await get_provider_by_id (session , target .provider_id )
688785 if not provider :
689786 stats ["missing_models" ] += 1
@@ -692,29 +789,66 @@ async def push_routing_groups_to_litellm(session, config, group_id: int | None =
692789 if not model :
693790 stats ["missing_models" ] += 1
694791 continue
792+
793+ fallback_name = model .get_display_name (apply_prefix = True )
794+ if fallback_name and fallback_name not in seen_fallback_models :
795+ seen_fallback_models [fallback_name ] = True
796+ fallback_models .append (fallback_name )
797+
798+ slot_count = max (1 , int (target .weight or 1 ))
799+ routing_target_tag = f"routing_target:{ provider .id } :{ model .model_id } "
800+ for slot in range (1 , slot_count + 1 ):
801+ extra_tags = [group_tag , routing_target_tag , f"routing_slot:{ slot } " ]
802+ try :
803+ await push_model_to_litellm (
804+ client ,
805+ config .litellm_base_url ,
806+ config .litellm_api_key ,
807+ provider ,
808+ model ,
809+ config = config ,
810+ session = session ,
811+ model_name_override = group .name ,
812+ extra_tags = extra_tags ,
813+ created_by = "routing_group" ,
814+ strip_unique_id = True ,
815+ )
816+ stats ["added" ] += 1
817+ except Exception as exc :
818+ stats ["errors" ] += 1
819+ logger .warning (
820+ "Failed pushing routing target %s/%s (slot=%s) for group %s: %s" ,
821+ provider .name ,
822+ model .model_id ,
823+ slot ,
824+ group .name ,
825+ exc ,
826+ )
827+
828+ if fallback_models :
695829 try :
696- await push_model_to_litellm (
830+ await _set_group_fallbacks (
697831 client ,
698832 config .litellm_base_url ,
699833 config .litellm_api_key ,
700- provider ,
701- model ,
702- config = config ,
703- session = session ,
704- model_name_override = group .name ,
705- extra_tags = [group_tag ],
706- created_by = "routing_group" ,
707- strip_unique_id = True ,
834+ group .name ,
835+ fallback_models ,
708836 )
709- stats ["added " ] += 1
837+ stats ["fallbacks_updated " ] += 1
710838 except Exception as exc :
711839 stats ["errors" ] += 1
712- logger .warning (
713- "Failed pushing routing target %s/%s for group %s: %s" ,
714- provider .name ,
715- model .model_id ,
840+ logger .warning ("Failed updating fallback config for group %s: %s" , group .name , exc )
841+ else :
842+ try :
843+ await _clear_group_fallbacks (
844+ client ,
845+ config .litellm_base_url ,
846+ config .litellm_api_key ,
716847 group .name ,
717- exc ,
718848 )
849+ stats ["fallbacks_deleted" ] += 1
850+ except Exception as exc :
851+ stats ["errors" ] += 1
852+ logger .warning ("Failed clearing fallback config for empty group %s: %s" , group .name , exc )
719853
720854 return stats
0 commit comments