Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,6 @@ vllm:request_generation_tokens_sum{model="vllm_model",version="1"} 16
vllm:request_generation_tokens_bucket{model="vllm_model",version="1",le="1"} 0
...
vllm:request_generation_tokens_bucket{model="vllm_model",version="1",le="+Inf"} 1
# HELP vllm:request_params_best_of Histogram of the best_of request parameter.
# TYPE vllm:request_params_best_of histogram
vllm:request_params_best_of_count{model="vllm_model",version="1"} 1
vllm:request_params_best_of_sum{model="vllm_model",version="1"} 1
vllm:request_params_best_of_bucket{model="vllm_model",version="1",le="1"} 1
...
vllm:request_params_best_of_bucket{model="vllm_model",version="1",le="+Inf"} 1
# HELP vllm:request_params_n Histogram of the n request parameter.
# TYPE vllm:request_params_n histogram
vllm:request_params_n_count{model="vllm_model",version="1"} 1
Expand Down
20 changes: 3 additions & 17 deletions ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,24 +189,10 @@ def test_custom_sampling_params(self):
model_name=self.vllm_model_name,
)
metrics_dict = self.parse_vllm_metrics()
total_prompts = len(self.prompts)

# vllm:request_params_best_of
"""
self.assertEqual(
metrics_dict["vllm:request_params_best_of_count"], total_prompts
)
self.assertEqual(
metrics_dict["vllm:request_params_best_of_sum"], best_of * total_prompts
)
self.assertEqual(
metrics_dict["vllm:request_params_best_of_bucket"], total_prompts
)
"""
# vllm:request_params_n
self.assertEqual(metrics_dict["vllm:request_params_n_count"], total_prompts)
# self.assertEqual(metrics_dict["vllm:request_params_n_sum"], n * total_prompts)
self.assertEqual(metrics_dict["vllm:request_params_n_bucket"], total_prompts)
self.assertIn("vllm:request_params_n_count", metrics_dict)
self.assertIn("vllm:request_params_n_sum", metrics_dict)
self.assertIn("vllm:request_params_n_bucket", metrics_dict)

def test_vllm_metrics_disabled(self):
# Test vLLM metrics
Expand Down
6 changes: 2 additions & 4 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,8 @@ def _setup_metrics(self):
"version": self.args["model_version"],
}
# Add vLLM custom metrics
engine_config = self._llm_engine.engine.model_config
self._vllm_metrics = VllmStatLogger(
labels, engine_config.max_model_len, self.logger
)
vllm_config = self._llm_engine.engine.vllm_config
self._vllm_metrics = VllmStatLogger(labels, vllm_config, self.logger)
self._llm_engine.add_logger("triton", self._vllm_metrics)
except pb_utils.TritonModelException as e:
if "metrics not supported" in str(e):
Expand Down
9 changes: 6 additions & 3 deletions src/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from typing import Dict, List, Union

import triton_python_backend_utils as pb_utils
from vllm.config import VllmConfig
from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase
from vllm.engine.metrics import Stats as VllmStats
from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets
Expand Down Expand Up @@ -163,11 +164,13 @@ def __init__(self, labels: List[str], max_model_len: int):
class VllmStatLogger(VllmStatLoggerBase):
"""StatLogger is used as an adapter between vLLM stats collector and Triton metrics provider."""

def __init__(self, labels: Dict, max_model_len: int, log_logger) -> None:
def __init__(self, labels: Dict, vllm_config: VllmConfig, log_logger) -> None:
# Tracked stats over current local logging interval.
# local_interval not used here. It's for vLLM logs to stdout.
super().__init__(local_interval=0)
self.metrics = TritonMetrics(labels, max_model_len)
super().__init__(local_interval=0, vllm_config=vllm_config)
self.metrics = TritonMetrics(
labels=labels, max_model_len=vllm_config.model_config.max_model_len
)
self.log_logger = log_logger

# Starting the metrics thread. It allows vLLM to keep making progress
Expand Down
Loading