@@ -109,20 +109,6 @@ def initialize(self, args):
109109 )
110110 self .output_dtype = pb_utils .triton_string_to_numpy (output_config ["data_type" ])
111111
112- # Create vLLM custom metrics
113- try :
114- labels = {
115- "model" : self .args ["model_name" ],
116- "version" : self .args ["model_version" ],
117- }
118- self .metrics = VllmStatLogger (labels = labels )
119- except pb_utils .TritonModelException as e :
120- if "metrics not supported" in str (e ):
121- # Metrics are disabled at the server
122- self .metrics = None
123- else :
124- raise e
125-
126112 # Prepare vLLM engine
127113 self .init_engine ()
128114
@@ -163,13 +149,28 @@ def init_engine(self):
163149 self .setup_lora ()
164150
165151 # Create an AsyncLLMEngine from the config from JSON
166- self .llm_engine = AsyncLLMEngine .from_engine_args (
167- AsyncEngineArgs (** self .vllm_engine_config )
168- )
152+ aync_engine_args = AsyncEngineArgs (** self .vllm_engine_config )
153+ self .llm_engine = AsyncLLMEngine .from_engine_args (aync_engine_args )
169154
170- # Add vLLM custom metrics
171- if self .metrics :
172- self .llm_engine .add_logger ("triton" , self .metrics )
155+ # Create vLLM custom metrics
156+ if not aync_engine_args .disable_log_stats :
157+ try :
158+ labels = {
159+ "model" : self .args ["model_name" ],
160+ "version" : self .args ["model_version" ],
161+ }
162+ self .metrics = VllmStatLogger (labels = labels )
163+ except pb_utils .TritonModelException as e :
164+ if "metrics not supported" in str (e ):
165+ # Metrics are disabled at the server
166+ self .metrics = None
167+ self .logger .log_info ("[vllm] Metrics not supported" )
168+ else :
169+ raise e
170+
171+ # Add vLLM custom metrics
172+ if self .metrics :
173+ self .llm_engine .add_logger ("triton" , self .metrics )
173174
174175 def setup_lora (self ):
175176 self .enable_lora = False
0 commit comments