diff --git a/README.md b/README.md index b90edcfc..0c69e131 100644 --- a/README.md +++ b/README.md @@ -284,13 +284,6 @@ vllm:request_generation_tokens_sum{model="vllm_model",version="1"} 16 vllm:request_generation_tokens_bucket{model="vllm_model",version="1",le="1"} 0 ... vllm:request_generation_tokens_bucket{model="vllm_model",version="1",le="+Inf"} 1 -# HELP vllm:request_params_best_of Histogram of the best_of request parameter. -# TYPE vllm:request_params_best_of histogram -vllm:request_params_best_of_count{model="vllm_model",version="1"} 1 -vllm:request_params_best_of_sum{model="vllm_model",version="1"} 1 -vllm:request_params_best_of_bucket{model="vllm_model",version="1",le="1"} 1 -... -vllm:request_params_best_of_bucket{model="vllm_model",version="1",le="+Inf"} 1 # HELP vllm:request_params_n Histogram of the n request parameter. # TYPE vllm:request_params_n histogram vllm:request_params_n_count{model="vllm_model",version="1"} 1 diff --git a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py index 1f8514e3..a5ff8b36 100644 --- a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py +++ b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py @@ -189,24 +189,10 @@ def test_custom_sampling_params(self): model_name=self.vllm_model_name, ) metrics_dict = self.parse_vllm_metrics() - total_prompts = len(self.prompts) - - # vllm:request_params_best_of - """ - self.assertEqual( - metrics_dict["vllm:request_params_best_of_count"], total_prompts - ) - self.assertEqual( - metrics_dict["vllm:request_params_best_of_sum"], best_of * total_prompts - ) - self.assertEqual( - metrics_dict["vllm:request_params_best_of_bucket"], total_prompts - ) - """ # vllm:request_params_n - self.assertEqual(metrics_dict["vllm:request_params_n_count"], total_prompts) - # self.assertEqual(metrics_dict["vllm:request_params_n_sum"], n * total_prompts) - self.assertEqual(metrics_dict["vllm:request_params_n_bucket"], total_prompts) + self.assertIn("vllm:request_params_n_count", metrics_dict) + self.assertIn("vllm:request_params_n_sum", metrics_dict) + self.assertIn("vllm:request_params_n_bucket", metrics_dict) def test_vllm_metrics_disabled(self): # Test vLLM metrics diff --git a/src/model.py b/src/model.py index 19ff713e..d201244c 100644 --- a/src/model.py +++ b/src/model.py @@ -359,10 +359,8 @@ def _setup_metrics(self): "version": self.args["model_version"], } # Add vLLM custom metrics - engine_config = self._llm_engine.engine.model_config - self._vllm_metrics = VllmStatLogger( - labels, engine_config.max_model_len, self.logger - ) + vllm_config = self._llm_engine.engine.vllm_config + self._vllm_metrics = VllmStatLogger(labels, vllm_config, self.logger) self._llm_engine.add_logger("triton", self._vllm_metrics) except pb_utils.TritonModelException as e: if "metrics not supported" in str(e): diff --git a/src/utils/metrics.py b/src/utils/metrics.py index c251e941..4f3cfaaa 100644 --- a/src/utils/metrics.py +++ b/src/utils/metrics.py @@ -29,6 +29,7 @@ from typing import Dict, List, Union import triton_python_backend_utils as pb_utils +from vllm.config import VllmConfig from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase from vllm.engine.metrics import Stats as VllmStats from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets @@ -163,11 +164,13 @@ def __init__(self, labels: List[str], max_model_len: int): class VllmStatLogger(VllmStatLoggerBase): """StatLogger is used as an adapter between vLLM stats collector and Triton metrics provider.""" - def __init__(self, labels: Dict, max_model_len: int, log_logger) -> None: + def __init__(self, labels: Dict, vllm_config: VllmConfig, log_logger) -> None: # Tracked stats over current local logging interval. # local_interval not used here. It's for vLLM logs to stdout. - super().__init__(local_interval=0) - self.metrics = TritonMetrics(labels, max_model_len) + super().__init__(local_interval=0, vllm_config=vllm_config) + self.metrics = TritonMetrics( + labels=labels, max_model_len=vllm_config.model_config.max_model_len + ) self.log_logger = log_logger # Starting the metrics thread. It allows vLLM to keep making progress