Skip to content

Commit d05a4ce

Browse files
authored
[BugFix] Fix v1/models in static discovery (#492)
* [FEAT][lora] lora controller with create and delete Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> * [FEAT][lora] add pod watch Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> * [Feat][lora] add router api support Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> * pre-commit Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> * fix conflixt Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> * [BugFix] Fix /v1/models endpoint Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> * [BugFix] Fix /v1/models endpoint Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> * [BugFix] Fix /v1/models endpoint Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> * [BugFix] Fix /v1/models endpoint Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> --------- Signed-off-by: Rui Zhang <zrfishnoodles@gmail.com> Co-authored-by: Rui Zhang <zrfishnoodles@gmail.com>
1 parent d14cb7c commit d05a4ce

File tree

3 files changed

+64
-37
lines changed

3 files changed

+64
-37
lines changed

src/vllm_router/routers/main_router.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,9 @@ async def show_models():
134134
model_card = ModelCard(
135135
id=model_id,
136136
object="model",
137-
created=model_info["created"],
138-
owned_by=model_info["owned_by"],
139-
parent=model_info["parent"],
137+
created=model_info.created,
138+
owned_by=model_info.owned_by,
139+
parent=model_info.parent,
140140
)
141141
model_cards.append(model_card)
142142
existing_models.add(model_id)
@@ -154,7 +154,7 @@ async def get_engine_instances():
154154
None
155155
156156
Returns:
157-
JSONResponse: A JSON response containing the list of models.
157+
JSONResponse: A JSON response containing the list of models and their relationships.
158158
159159
Raises:
160160
Exception: If there is an error in retrieving the endpoint information.

src/vllm_router/service_discovery.py

Lines changed: 58 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,44 @@ class ServiceDiscoveryType(enum.Enum):
3939
K8S = "k8s"
4040

4141

42+
@dataclass
43+
class ModelInfo:
44+
"""Information about a model including its relationships and metadata."""
45+
46+
id: str
47+
object: str
48+
created: int = 0
49+
owned_by: str = "vllm"
50+
root: Optional[str] = None
51+
parent: Optional[str] = None
52+
is_adapter: bool = False
53+
54+
@classmethod
55+
def from_dict(cls, data: Dict) -> "ModelInfo":
56+
"""Create a ModelInfo instance from a dictionary."""
57+
return cls(
58+
id=data.get("id"),
59+
object=data.get("object", "model"),
60+
created=data.get("created", int(time.time())),
61+
owned_by=data.get("owned_by", "vllm"),
62+
root=data.get("root", None),
63+
parent=data.get("parent", None),
64+
is_adapter=data.get("parent") is not None,
65+
)
66+
67+
def to_dict(self) -> Dict:
68+
"""Convert the ModelInfo instance to a dictionary."""
69+
return {
70+
"id": self.id,
71+
"object": self.object,
72+
"created": self.created,
73+
"owned_by": self.owned_by,
74+
"root": self.root,
75+
"parent": self.parent,
76+
"is_adapter": self.is_adapter,
77+
}
78+
79+
4280
@dataclass
4381
class EndpointInfo:
4482
# Endpoint's url
@@ -63,7 +101,7 @@ class EndpointInfo:
63101
namespace: Optional[str] = None
64102

65103
# Model information including relationships
66-
model_info: Dict[str, Dict] = None
104+
model_info: Dict[str, ModelInfo] = None
67105

68106
def __str__(self):
69107
return f"EndpointInfo(url={self.url}, model_names={self.model_names}, added_timestamp={self.added_timestamp}, model_label={self.model_label}, pod_name={self.pod_name}, namespace={self.namespace})"
@@ -75,9 +113,7 @@ def get_base_models(self) -> List[str]:
75113
if not self.model_info:
76114
return []
77115
return [
78-
model_id
79-
for model_id, info in self.model_info.items()
80-
if not info.get("parent")
116+
model_id for model_id, info in self.model_info.items() if not info.parent
81117
]
82118

83119
def get_adapters(self) -> List[str]:
@@ -86,9 +122,7 @@ def get_adapters(self) -> List[str]:
86122
"""
87123
if not self.model_info:
88124
return []
89-
return [
90-
model_id for model_id, info in self.model_info.items() if info.get("parent")
91-
]
125+
return [model_id for model_id, info in self.model_info.items() if info.parent]
92126

93127
def get_adapters_for_model(self, base_model: str) -> List[str]:
94128
"""
@@ -105,7 +139,7 @@ def get_adapters_for_model(self, base_model: str) -> List[str]:
105139
return [
106140
model_id
107141
for model_id, info in self.model_info.items()
108-
if info.get("parent") == base_model
142+
if info.parent == base_model
109143
]
110144

111145
def has_model(self, model_id: str) -> bool:
@@ -120,15 +154,15 @@ def has_model(self, model_id: str) -> bool:
120154
"""
121155
return model_id in self.model_names
122156

123-
def get_model_info(self, model_id: str) -> Optional[Dict]:
157+
def get_model_info(self, model_id: str) -> Optional[ModelInfo]:
124158
"""
125159
Get detailed information about a specific model.
126160
127161
Args:
128162
model_id: The ID of the model to get information for
129163
130164
Returns:
131-
Dictionary containing model information if available, None otherwise
165+
ModelInfo object containing model information if available, None otherwise
132166
"""
133167
if not self.model_info:
134168
return None
@@ -219,7 +253,7 @@ def start_health_check_task(self) -> None:
219253
def get_model_endpoint_hash(self, url: str, model: str) -> str:
220254
return hashlib.md5(f"{url}{model}".encode()).hexdigest()
221255

222-
def _get_model_info(self, model: str) -> Dict[str, Dict]:
256+
def _get_model_info(self, model: str) -> Dict[str, ModelInfo]:
223257
"""
224258
Get detailed model information. For static serving engines, we don't query the engine, instead we use predefined
225259
static model info.
@@ -231,13 +265,15 @@ def _get_model_info(self, model: str) -> Dict[str, Dict]:
231265
Dictionary mapping model IDs to their information, including parent-child relationships
232266
"""
233267
return {
234-
model: {
235-
"id": model,
236-
"object": "model",
237-
"owned_by": "vllm",
238-
"parent": None,
239-
"is_adapter": False,
240-
}
268+
model: ModelInfo(
269+
id=model,
270+
object="model",
271+
owned_by="vllm",
272+
parent=None,
273+
is_adapter=False,
274+
root=None,
275+
created=int(time.time()),
276+
)
241277
}
242278

243279
def get_endpoint_info(self) -> List[EndpointInfo]:
@@ -373,15 +409,15 @@ def _get_model_names(self, pod_ip) -> List[str]:
373409
logger.error(f"Failed to get model names from {url}: {e}")
374410
return []
375411

376-
def _get_model_info(self, pod_ip) -> Dict[str, Dict]:
412+
def _get_model_info(self, pod_ip) -> Dict[str, ModelInfo]:
377413
"""
378414
Get detailed model information from the serving engine pod.
379415
380416
Args:
381417
pod_ip: the IP address of the pod
382418
383419
Returns:
384-
Dictionary mapping model IDs to their information, including parent-child relationships
420+
Dictionary mapping model IDs to their ModelInfo objects, including parent-child relationships
385421
"""
386422
url = f"http://{pod_ip}:{self.port}/v1/models"
387423
try:
@@ -392,20 +428,11 @@ def _get_model_info(self, pod_ip) -> Dict[str, Dict]:
392428
response = requests.get(url, headers=headers)
393429
response.raise_for_status()
394430
models = response.json()["data"]
395-
396431
# Create a dictionary of model information
397432
model_info = {}
398433
for model in models:
399434
model_id = model["id"]
400-
model_info[model_id] = {
401-
"id": model_id,
402-
"object": model["object"],
403-
"created": model["created"],
404-
"owned_by": model["owned_by"],
405-
"root": model["root"],
406-
"parent": model.get("parent"),
407-
"is_adapter": model.get("parent") is not None,
408-
}
435+
model_info[model_id] = ModelInfo.from_dict(model)
409436

410437
return model_info
411438
except Exception as e:
@@ -480,6 +507,7 @@ def _add_engine(
480507
model_label=model_label,
481508
pod_name=engine_name,
482509
namespace=self.namespace,
510+
model_info=model_info,
483511
)
484512
if (
485513
self.prefill_model_labels is not None

src/vllm_router/stats/log_stats.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,11 @@ def log_stats(app: FastAPI, interval: int = 10):
6060
logstr += "Models:\n"
6161
for model_id, model_info in endpoint.model_info.items():
6262
logstr += f" - {model_id}"
63-
if model_info.get("parent"):
64-
logstr += f" (adapter for {model_info['parent']})"
63+
if model_info.parent:
64+
logstr += f" (adapter for {model_info.parent})"
6565
logstr += "\n"
6666
else:
6767
logstr += "Models: No model information available\n"
68-
6968
if url in engine_stats:
7069
es = engine_stats[url]
7170
logstr += (

0 commit comments

Comments
 (0)