@@ -172,8 +172,13 @@ def initialize(self, args):
172172 )
173173 self ._is_healthy = True
174174
175+ # Initialize engine arguments
176+ # TODO: Move this into _init_engine(), after moving check metrics enabled.
177+ self ._init_engine_args ()
178+
175179 # Check if metrics are enabled. The ZMQ process cannot be used when metrics are
176180 # enabled.
181+ # TODO: Move the check into _setup_metrics().
177182 self ._enable_metrics = (
178183 self ._get_bool_config_param ("REPORT_CUSTOM_METRICS" )
179184 and not self ._aync_engine_args .disable_log_stats
@@ -191,7 +196,7 @@ def initialize(self, args):
191196 self ._response_thread = threading .Thread (target = self ._response_loop )
192197 self ._response_thread .start ()
193198
194- def _init_engine (self ):
199+ def _init_engine_args (self ):
195200 # Currently, Triton needs to use decoupled policy for asynchronously
196201 # forwarding requests to vLLM engine, so assert it.
197202 self .using_decoupled = pb_utils .using_decoupled_model_transaction_policy (
@@ -219,6 +224,7 @@ def _init_engine(self):
219224 # Create an AsyncEngineArgs from the config from JSON
220225 self ._aync_engine_args = AsyncEngineArgs (** self .vllm_engine_config )
221226
227+ def _init_engine (self ):
222228 # Run the engine in a separate thread running the AsyncIO event loop.
223229 self ._llm_engine = None
224230 self ._llm_engine_start_cv = threading .Condition ()
0 commit comments