|
32 | 32 | from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase |
33 | 33 | from vllm.engine.metrics import Stats as VllmStats |
34 | 34 | from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets |
35 | | -from vllm.version import __version__ as _VLLM_VERSION |
36 | 35 |
|
37 | 36 |
|
38 | 37 | class TritonMetrics: |
@@ -77,14 +76,6 @@ def __init__(self, labels: List[str], max_model_len: int): |
77 | 76 | description="Number of generation tokens processed.", |
78 | 77 | kind=pb_utils.MetricFamily.HISTOGRAM, |
79 | 78 | ) |
80 | | - # 'best_of' metric has been hidden since vllm 0.6.3 |
81 | | - # https://github.com/vllm-project/vllm/commit/cbc2ef55292b2af6ff742095c030e8425124c005 |
82 | | - if _VLLM_VERSION < "0.6.3": |
83 | | - self.histogram_best_of_request_family = pb_utils.MetricFamily( |
84 | | - name="vllm:request_params_best_of", |
85 | | - description="Histogram of the best_of request parameter.", |
86 | | - kind=pb_utils.MetricFamily.HISTOGRAM, |
87 | | - ) |
88 | 79 | self.histogram_n_request_family = pb_utils.MetricFamily( |
89 | 80 | name="vllm:request_params_n", |
90 | 81 | description="Histogram of the n request parameter.", |
@@ -163,13 +154,6 @@ def __init__(self, labels: List[str], max_model_len: int): |
163 | 154 | buckets=build_1_2_5_buckets(max_model_len), |
164 | 155 | ) |
165 | 156 | ) |
166 | | - if _VLLM_VERSION < "0.6.3": |
167 | | - self.histogram_best_of_request = ( |
168 | | - self.histogram_best_of_request_family.Metric( |
169 | | - labels=labels, |
170 | | - buckets=[1, 2, 5, 10, 20], |
171 | | - ) |
172 | | - ) |
173 | 157 | self.histogram_n_request = self.histogram_n_request_family.Metric( |
174 | 158 | labels=labels, |
175 | 159 | buckets=[1, 2, 5, 10, 20], |
@@ -256,10 +240,6 @@ def log(self, stats: VllmStats) -> None: |
256 | 240 | ), |
257 | 241 | (self.metrics.histogram_n_request, stats.n_requests), |
258 | 242 | ] |
259 | | - if _VLLM_VERSION < "0.6.3": |
260 | | - histogram_metrics.append( |
261 | | - (self.metrics.histogram_best_of_request, stats.best_of_requests) |
262 | | - ) |
263 | 243 | for metric, data in counter_metrics: |
264 | 244 | self._log_counter(metric, data) |
265 | 245 | for metric, data in histogram_metrics: |
|
0 commit comments