@@ -31,6 +31,8 @@ class ModelConfig:
31
31
32
32
Args:
33
33
model: Name or path of the huggingface model to use.
34
+ It is also used as the content for `model_name` tag in metrics
35
+ output when `served_model_name` is not specified.
34
36
tokenizer: Name or path of the huggingface tokenizer to use.
35
37
tokenizer_mode: Tokenizer mode. "auto" will use the fast tokenizer if
36
38
available, and "slow" will always use the slow tokenizer.
@@ -69,6 +71,10 @@ class ModelConfig:
69
71
to eager mode
70
72
skip_tokenizer_init: If true, skip initialization of tokenizer and
71
73
detokenizer.
74
+ served_model_name: The model name used in metrics tag `model_name`,
75
+ matches the model name exposed via the APIs. If multiple model
76
+ names provided, the first name will be used. If not specified,
77
+ the model name will be the same as `model`.
72
78
"""
73
79
74
80
def __init__ (
@@ -90,6 +96,7 @@ def __init__(
90
96
max_seq_len_to_capture : Optional [int ] = None ,
91
97
max_logprobs : int = 5 ,
92
98
skip_tokenizer_init : bool = False ,
99
+ served_model_name : Optional [Union [str , List [str ]]] = None ,
93
100
) -> None :
94
101
self .model = model
95
102
self .tokenizer = tokenizer
@@ -117,6 +124,8 @@ def __init__(
117
124
self .dtype = _get_and_verify_dtype (self .hf_text_config , dtype )
118
125
self .max_model_len = _get_and_verify_max_len (self .hf_text_config ,
119
126
max_model_len )
127
+ self .served_model_name = get_served_model_name (model ,
128
+ served_model_name )
120
129
if not self .skip_tokenizer_init :
121
130
self ._verify_tokenizer_mode ()
122
131
self ._verify_quantization ()
@@ -1150,6 +1159,22 @@ def _get_and_verify_max_len(
1150
1159
return int (max_model_len )
1151
1160
1152
1161
1162
+ def get_served_model_name (model : str ,
1163
+ served_model_name : Optional [Union [str , List [str ]]]):
1164
+ """
1165
+ If the input is a non-empty list, the first model_name in
1166
+ `served_model_name` is taken.
1167
+ If the input is a non-empty string, it is used directly.
1168
+ For cases where the input is either an empty string or an
1169
+ empty list, the fallback is to use `self.model`.
1170
+ """
1171
+ if not served_model_name :
1172
+ return model
1173
+ if isinstance (served_model_name , list ):
1174
+ return served_model_name [0 ]
1175
+ return served_model_name
1176
+
1177
+
1153
1178
@dataclass
1154
1179
class DecodingConfig :
1155
1180
"""Dataclass which contains the decoding strategy of the engine"""
0 commit comments