Skip to content

Commit ddb90c9

Browse files
[Fix] - Router: add model_name index for O(1) deployment lookups (#15113)
* perf(router): add model_name index for O(1) deployment lookups Add model_name_to_deployment_indices mapping to optimize _get_all_deployments() from O(n) to O(1) + O(k) lookups. - Add model_name_to_deployment_indices: Dict[str, List[int]] - Add _build_model_name_index() to build/maintain the index - Update _add_model_to_list_and_index_map() to maintain both indices - Refactor to use idx = len(self.model_list) before append (cleaner) - Optimize _get_all_deployments() to use index instead of linear scan * test(router): add test coverage for _build_model_name_index Add single comprehensive test for _build_model_name_index() function to fix code coverage CI failure. The test verifies: - Index correctly maps model_name to deployment indices - Handles multiple deployments per model_name - Clears and rebuilds index correctly Fixes: CI code coverage error for _build_model_name_index
1 parent 9383ffc commit ddb90c9

File tree

2 files changed

+106
-17
lines changed

2 files changed

+106
-17
lines changed

litellm/router.py

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,9 @@ def __init__( # noqa: PLR0915
416416

417417
# Initialize model ID to deployment index mapping for O(1) lookups
418418
self.model_id_to_deployment_index_map: Dict[str, int] = {}
419+
# Initialize model name to deployment indices mapping for O(1) lookups
420+
# Maps model_name -> list of indices in model_list
421+
self.model_name_to_deployment_indices: Dict[str, List[int]] = {}
419422

420423
if model_list is not None:
421424
# Build model index immediately to enable O(1) lookups from the start
@@ -5097,6 +5100,7 @@ def set_model_list(self, model_list: list):
50975100
original_model_list = copy.deepcopy(model_list)
50985101
self.model_list = []
50995102
self.model_id_to_deployment_index_map = {} # Reset the index
5103+
self.model_name_to_deployment_indices = {} # Reset the model_name index
51005104
# we add api_base/api_key each model so load balancing between azure/gpt on api_base1 and api_base2 works
51015105

51025106
for model in original_model_list:
@@ -5138,6 +5142,9 @@ def set_model_list(self, model_list: list):
51385142
f"\nInitialized Model List {self.get_model_names()}"
51395143
)
51405144
self.model_names = [m["model_name"] for m in model_list]
5145+
5146+
# Build model_name index for O(1) lookups
5147+
self._build_model_name_index(self.model_list)
51415148

51425149
def _add_deployment(self, deployment: Deployment) -> Deployment:
51435150
import os
@@ -5365,20 +5372,27 @@ def _add_model_to_list_and_index_map(
53655372
self, model: dict, model_id: Optional[str] = None
53665373
) -> None:
53675374
"""
5368-
Helper method to add a model to the model_list and update the model_id_to_deployment_index_map.
5375+
Helper method to add a model to the model_list and update both indices.
53695376
53705377
Parameters:
53715378
- model: dict - the model to add to the list
53725379
- model_id: Optional[str] - the model ID to use for indexing. If None, will try to get from model["model_info"]["id"]
53735380
"""
5381+
idx = len(self.model_list)
53745382
self.model_list.append(model)
5375-
# Update model index for O(1) lookup
5383+
5384+
# Update model_id index for O(1) lookup
53765385
if model_id is not None:
5377-
self.model_id_to_deployment_index_map[model_id] = len(self.model_list) - 1
5386+
self.model_id_to_deployment_index_map[model_id] = idx
53785387
elif model.get("model_info", {}).get("id") is not None:
5379-
self.model_id_to_deployment_index_map[model["model_info"]["id"]] = (
5380-
len(self.model_list) - 1
5381-
)
5388+
self.model_id_to_deployment_index_map[model["model_info"]["id"]] = idx
5389+
5390+
# Update model_name index for O(1) lookup
5391+
model_name = model.get("model_name")
5392+
if model_name:
5393+
if model_name not in self.model_name_to_deployment_indices:
5394+
self.model_name_to_deployment_indices[model_name] = []
5395+
self.model_name_to_deployment_indices[model_name].append(idx)
53825396

53835397
def upsert_deployment(self, deployment: Deployment) -> Optional[Deployment]:
53845398
"""
@@ -6094,6 +6108,22 @@ async def set_response_headers(
60946108
additional_headers[header] = value
60956109
return response
60966110

6111+
def _build_model_name_index(self, model_list: list) -> None:
6112+
"""
6113+
Build model_name -> deployment indices mapping for O(1) lookups.
6114+
6115+
This index allows us to find all deployments for a given model_name in O(1) time
6116+
instead of O(n) linear scan through the entire model_list.
6117+
"""
6118+
self.model_name_to_deployment_indices.clear()
6119+
6120+
for idx, model in enumerate(model_list):
6121+
model_name = model.get("model_name")
6122+
if model_name:
6123+
if model_name not in self.model_name_to_deployment_indices:
6124+
self.model_name_to_deployment_indices[model_name] = []
6125+
self.model_name_to_deployment_indices[model_name].append(idx)
6126+
60976127
def _build_model_id_to_deployment_index_map(self, model_list: list):
60986128
"""
60996129
Build model index from model list to enable O(1) lookups immediately.
@@ -6198,18 +6228,27 @@ def _get_all_deployments(
61986228
Used for accurate 'get_model_list'.
61996229
62006230
if team_id specified, only return team-specific models
6231+
6232+
Optimized with O(1) index lookup instead of O(n) linear scan.
62016233
"""
62026234
returned_models: List[DeploymentTypedDict] = []
6203-
for model in self.model_list:
6204-
if self.should_include_deployment(
6205-
model_name=model_name, model=model, team_id=team_id
6206-
):
6207-
if model_alias is not None:
6208-
alias_model = copy.deepcopy(model)
6209-
alias_model["model_name"] = model_alias
6210-
returned_models.append(alias_model)
6211-
else:
6212-
returned_models.append(model)
6235+
6236+
# O(1) lookup in model_name index
6237+
if model_name in self.model_name_to_deployment_indices:
6238+
indices = self.model_name_to_deployment_indices[model_name]
6239+
6240+
# O(k) where k = deployments for this model_name (typically 1-10)
6241+
for idx in indices:
6242+
model = self.model_list[idx]
6243+
if self.should_include_deployment(
6244+
model_name=model_name, model=model, team_id=team_id
6245+
):
6246+
if model_alias is not None:
6247+
alias_model = copy.deepcopy(model)
6248+
alias_model["model_name"] = model_alias
6249+
returned_models.append(alias_model)
6250+
else:
6251+
returned_models.append(model)
62136252

62146253
return returned_models
62156254

tests/router_unit_tests/test_router_index_management.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ def test_add_model_to_list_and_index_map_from_model_info(self, router):
7777
# Verify: Index map uses model_info.id
7878
assert router.model_id_to_deployment_index_map["model-info-id"] == 0
7979

80-
8180
def test_add_model_to_list_and_index_map_multiple_models(self, router):
8281
"""Test _add_model_to_list_and_index_map with multiple models to verify indexing"""
8382
# Setup: Empty router
@@ -127,3 +126,54 @@ def test_has_model_id(self, router):
127126
# Test: Empty router
128127
empty_router = Router(model_list=[])
129128
assert empty_router.has_model_id("any-id") == False
129+
130+
def test_build_model_name_index(self, router):
131+
"""Test _build_model_name_index function"""
132+
model_list = [
133+
{
134+
"model_name": "gpt-3.5-turbo",
135+
"litellm_params": {"model": "gpt-3.5-turbo"},
136+
"model_info": {"id": "model-1"},
137+
},
138+
{
139+
"model_name": "gpt-4",
140+
"litellm_params": {"model": "gpt-4"},
141+
"model_info": {"id": "model-2"},
142+
},
143+
{
144+
"model_name": "gpt-4", # Duplicate model_name, different deployment
145+
"litellm_params": {"model": "gpt-4"},
146+
"model_info": {"id": "model-3"},
147+
},
148+
]
149+
150+
# Test: Build index from model list
151+
router._build_model_name_index(model_list)
152+
153+
# Verify: model_name_to_deployment_indices is correctly built
154+
assert "gpt-3.5-turbo" in router.model_name_to_deployment_indices
155+
assert "gpt-4" in router.model_name_to_deployment_indices
156+
157+
# Verify: gpt-3.5-turbo has single deployment
158+
assert router.model_name_to_deployment_indices["gpt-3.5-turbo"] == [0]
159+
160+
# Verify: gpt-4 has multiple deployments
161+
assert router.model_name_to_deployment_indices["gpt-4"] == [1, 2]
162+
163+
# Test: Rebuild index (should clear and rebuild)
164+
new_model_list = [
165+
{
166+
"model_name": "claude-3",
167+
"litellm_params": {"model": "claude-3"},
168+
"model_info": {"id": "model-4"},
169+
},
170+
]
171+
router._build_model_name_index(new_model_list)
172+
173+
# Verify: Old entries are cleared
174+
assert "gpt-3.5-turbo" not in router.model_name_to_deployment_indices
175+
assert "gpt-4" not in router.model_name_to_deployment_indices
176+
177+
# Verify: New entry is added
178+
assert "claude-3" in router.model_name_to_deployment_indices
179+
assert router.model_name_to_deployment_indices["claude-3"] == [0]

0 commit comments

Comments
 (0)