@@ -39,6 +39,44 @@ class ServiceDiscoveryType(enum.Enum):
3939 K8S = "k8s"
4040
4141
42+ @dataclass
43+ class ModelInfo :
44+ """Information about a model including its relationships and metadata."""
45+
46+ id : str
47+ object : str
48+ created : int = 0
49+ owned_by : str = "vllm"
50+ root : Optional [str ] = None
51+ parent : Optional [str ] = None
52+ is_adapter : bool = False
53+
54+ @classmethod
55+ def from_dict (cls , data : Dict ) -> "ModelInfo" :
56+ """Create a ModelInfo instance from a dictionary."""
57+ return cls (
58+ id = data .get ("id" ),
59+ object = data .get ("object" , "model" ),
60+ created = data .get ("created" , int (time .time ())),
61+ owned_by = data .get ("owned_by" , "vllm" ),
62+ root = data .get ("root" , None ),
63+ parent = data .get ("parent" , None ),
64+ is_adapter = data .get ("parent" ) is not None ,
65+ )
66+
67+ def to_dict (self ) -> Dict :
68+ """Convert the ModelInfo instance to a dictionary."""
69+ return {
70+ "id" : self .id ,
71+ "object" : self .object ,
72+ "created" : self .created ,
73+ "owned_by" : self .owned_by ,
74+ "root" : self .root ,
75+ "parent" : self .parent ,
76+ "is_adapter" : self .is_adapter ,
77+ }
78+
79+
4280@dataclass
4381class EndpointInfo :
4482 # Endpoint's url
@@ -63,7 +101,7 @@ class EndpointInfo:
63101 namespace : Optional [str ] = None
64102
65103 # Model information including relationships
66- model_info : Dict [str , Dict ] = None
104+ model_info : Dict [str , ModelInfo ] = None
67105
68106 def __str__ (self ):
69107 return f"EndpointInfo(url={ self .url } , model_names={ self .model_names } , added_timestamp={ self .added_timestamp } , model_label={ self .model_label } , pod_name={ self .pod_name } , namespace={ self .namespace } )"
@@ -75,9 +113,7 @@ def get_base_models(self) -> List[str]:
75113 if not self .model_info :
76114 return []
77115 return [
78- model_id
79- for model_id , info in self .model_info .items ()
80- if not info .get ("parent" )
116+ model_id for model_id , info in self .model_info .items () if not info .parent
81117 ]
82118
83119 def get_adapters (self ) -> List [str ]:
@@ -86,9 +122,7 @@ def get_adapters(self) -> List[str]:
86122 """
87123 if not self .model_info :
88124 return []
89- return [
90- model_id for model_id , info in self .model_info .items () if info .get ("parent" )
91- ]
125+ return [model_id for model_id , info in self .model_info .items () if info .parent ]
92126
93127 def get_adapters_for_model (self , base_model : str ) -> List [str ]:
94128 """
@@ -105,7 +139,7 @@ def get_adapters_for_model(self, base_model: str) -> List[str]:
105139 return [
106140 model_id
107141 for model_id , info in self .model_info .items ()
108- if info .get ( " parent" ) == base_model
142+ if info .parent == base_model
109143 ]
110144
111145 def has_model (self , model_id : str ) -> bool :
@@ -120,15 +154,15 @@ def has_model(self, model_id: str) -> bool:
120154 """
121155 return model_id in self .model_names
122156
123- def get_model_info (self , model_id : str ) -> Optional [Dict ]:
157+ def get_model_info (self , model_id : str ) -> Optional [ModelInfo ]:
124158 """
125159 Get detailed information about a specific model.
126160
127161 Args:
128162 model_id: The ID of the model to get information for
129163
130164 Returns:
131- Dictionary containing model information if available, None otherwise
165+ ModelInfo object containing model information if available, None otherwise
132166 """
133167 if not self .model_info :
134168 return None
@@ -219,7 +253,7 @@ def start_health_check_task(self) -> None:
219253 def get_model_endpoint_hash (self , url : str , model : str ) -> str :
220254 return hashlib .md5 (f"{ url } { model } " .encode ()).hexdigest ()
221255
222- def _get_model_info (self , model : str ) -> Dict [str , Dict ]:
256+ def _get_model_info (self , model : str ) -> Dict [str , ModelInfo ]:
223257 """
224258 Get detailed model information. For static serving engines, we don't query the engine, instead we use predefined
225259 static model info.
@@ -231,13 +265,15 @@ def _get_model_info(self, model: str) -> Dict[str, Dict]:
231265 Dictionary mapping model IDs to their information, including parent-child relationships
232266 """
233267 return {
234- model : {
235- "id" : model ,
236- "object" : "model" ,
237- "owned_by" : "vllm" ,
238- "parent" : None ,
239- "is_adapter" : False ,
240- }
268+ model : ModelInfo (
269+ id = model ,
270+ object = "model" ,
271+ owned_by = "vllm" ,
272+ parent = None ,
273+ is_adapter = False ,
274+ root = None ,
275+ created = int (time .time ()),
276+ )
241277 }
242278
243279 def get_endpoint_info (self ) -> List [EndpointInfo ]:
@@ -373,15 +409,15 @@ def _get_model_names(self, pod_ip) -> List[str]:
373409 logger .error (f"Failed to get model names from { url } : { e } " )
374410 return []
375411
376- def _get_model_info (self , pod_ip ) -> Dict [str , Dict ]:
412+ def _get_model_info (self , pod_ip ) -> Dict [str , ModelInfo ]:
377413 """
378414 Get detailed model information from the serving engine pod.
379415
380416 Args:
381417 pod_ip: the IP address of the pod
382418
383419 Returns:
384- Dictionary mapping model IDs to their information , including parent-child relationships
420+ Dictionary mapping model IDs to their ModelInfo objects , including parent-child relationships
385421 """
386422 url = f"http://{ pod_ip } :{ self .port } /v1/models"
387423 try :
@@ -392,20 +428,11 @@ def _get_model_info(self, pod_ip) -> Dict[str, Dict]:
392428 response = requests .get (url , headers = headers )
393429 response .raise_for_status ()
394430 models = response .json ()["data" ]
395-
396431 # Create a dictionary of model information
397432 model_info = {}
398433 for model in models :
399434 model_id = model ["id" ]
400- model_info [model_id ] = {
401- "id" : model_id ,
402- "object" : model ["object" ],
403- "created" : model ["created" ],
404- "owned_by" : model ["owned_by" ],
405- "root" : model ["root" ],
406- "parent" : model .get ("parent" ),
407- "is_adapter" : model .get ("parent" ) is not None ,
408- }
435+ model_info [model_id ] = ModelInfo .from_dict (model )
409436
410437 return model_info
411438 except Exception as e :
@@ -480,6 +507,7 @@ def _add_engine(
480507 model_label = model_label ,
481508 pod_name = engine_name ,
482509 namespace = self .namespace ,
510+ model_info = model_info ,
483511 )
484512 if (
485513 self .prefill_model_labels is not None
0 commit comments