|
32 | 32 | from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase |
33 | 33 | from vllm.engine.metrics import Stats as VllmStats |
34 | 34 | from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets |
35 | | - |
| 35 | +from vllm.version import __version__ as _VLLM_VERSION |
36 | 36 |
|
37 | 37 | class TritonMetrics: |
38 | 38 | def __init__(self, labels: List[str], max_model_len: int): |
@@ -76,6 +76,14 @@ def __init__(self, labels: List[str], max_model_len: int): |
76 | 76 | description="Number of generation tokens processed.", |
77 | 77 | kind=pb_utils.MetricFamily.HISTOGRAM, |
78 | 78 | ) |
| 79 | + # 'best_of' metric has been hidden since vllm 0.6.3 |
| 80 | + # https://github.com/vllm-project/vllm/commit/cbc2ef55292b2af6ff742095c030e8425124c005 |
| 81 | + if _VLLM_VERSION < "0.6.3": |
| 82 | + self.histogram_best_of_request_family = pb_utils.MetricFamily( |
| 83 | + name="vllm:request_params_best_of", |
| 84 | + description="Histogram of the best_of request parameter.", |
| 85 | + kind=pb_utils.MetricFamily.HISTOGRAM, |
| 86 | + ) |
79 | 87 | self.histogram_n_request_family = pb_utils.MetricFamily( |
80 | 88 | name="vllm:request_params_n", |
81 | 89 | description="Histogram of the n request parameter.", |
@@ -154,6 +162,11 @@ def __init__(self, labels: List[str], max_model_len: int): |
154 | 162 | buckets=build_1_2_5_buckets(max_model_len), |
155 | 163 | ) |
156 | 164 | ) |
| 165 | + if _VLLM_VERSION < "0.6.3": |
| 166 | + self.histogram_best_of_request = self.histogram_best_of_request_family.Metric( |
| 167 | + labels=labels, |
| 168 | + buckets=[1, 2, 5, 10, 20], |
| 169 | + ) |
157 | 170 | self.histogram_n_request = self.histogram_n_request_family.Metric( |
158 | 171 | labels=labels, |
159 | 172 | buckets=[1, 2, 5, 10, 20], |
@@ -240,7 +253,8 @@ def log(self, stats: VllmStats) -> None: |
240 | 253 | ), |
241 | 254 | (self.metrics.histogram_n_request, stats.n_requests), |
242 | 255 | ] |
243 | | - |
| 256 | + if _VLLM_VERSION < "0.6.3": |
| 257 | + histogram_metrics.append((self.metrics.histogram_best_of_request, stats.best_of_requests)) |
244 | 258 | for metric, data in counter_metrics: |
245 | 259 | self._log_counter(metric, data) |
246 | 260 | for metric, data in histogram_metrics: |
|
0 commit comments