Skip to content

Commit 8b17341

Browse files
committed
Remove vLLM 0.6.x version checks
1 parent 0b9c8e2 commit 8b17341

File tree

2 files changed

+18
-43
lines changed

2 files changed

+18
-43
lines changed

src/model.py

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
from vllm.lora.request import LoRARequest
4444
from vllm.sampling_params import SamplingParams
4545
from vllm.utils import random_uuid
46-
from vllm.version import __version__ as _VLLM_VERSION
4746

4847
from utils.metrics import VllmStatLogger
4948

@@ -74,6 +73,12 @@ def _auto_complete_inputs_and_outputs(auto_complete_model_config):
7473
# Inputs expected by the backend.
7574
inputs = [
7675
{"name": "text_input", "data_type": "TYPE_STRING", "dims": [1]},
76+
{
77+
"name": "image",
78+
"data_type": "TYPE_STRING",
79+
"dims": [-1], # can be multiple images as separate elements
80+
"optional": True,
81+
},
7782
{
7883
"name": "stream",
7984
"data_type": "TYPE_BOOL",
@@ -123,15 +128,6 @@ def _auto_complete_inputs_and_outputs(auto_complete_model_config):
123128
"optional": True,
124129
},
125130
]
126-
if _VLLM_VERSION >= "0.6.3.post1":
127-
inputs.append(
128-
{
129-
"name": "image",
130-
"data_type": "TYPE_STRING",
131-
"dims": [-1], # can be multiple images as separate elements
132-
"optional": True,
133-
}
134-
)
135131
# Outputs expected by the backend.
136132
outputs = [
137133
{"name": "text_output", "data_type": "TYPE_STRING", "dims": [-1]},
@@ -352,19 +348,18 @@ def _get_input_tensors(self, request):
352348
prompt = prompt.decode("utf-8")
353349

354350
# image
355-
if _VLLM_VERSION >= "0.6.3.post1":
356-
images = pb_utils.get_input_tensor_by_name(request, "image")
357-
if images:
358-
images_vllm = []
359-
for image_np in images.as_numpy():
360-
image_b = base64.b64decode(image_np.decode("utf-8"))
361-
image_rgb = Image.open(BytesIO(image_b)).convert("RGB")
362-
images_vllm.append(image_rgb)
363-
if len(images_vllm) > 0:
364-
prompt = {
365-
"prompt": prompt,
366-
"multi_modal_data": {"image": images_vllm},
367-
}
351+
images = pb_utils.get_input_tensor_by_name(request, "image")
352+
if images:
353+
images_vllm = []
354+
for image_np in images.as_numpy():
355+
image_b = base64.b64decode(image_np.decode("utf-8"))
356+
image_rgb = Image.open(BytesIO(image_b)).convert("RGB")
357+
images_vllm.append(image_rgb)
358+
if len(images_vllm) > 0:
359+
prompt = {
360+
"prompt": prompt,
361+
"multi_modal_data": {"image": images_vllm},
362+
}
368363

369364
# stream
370365
stream = pb_utils.get_input_tensor_by_name(request, "stream")

src/utils/metrics.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase
3333
from vllm.engine.metrics import Stats as VllmStats
3434
from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets
35-
from vllm.version import __version__ as _VLLM_VERSION
3635

3736

3837
class TritonMetrics:
@@ -77,14 +76,6 @@ def __init__(self, labels: List[str], max_model_len: int):
7776
description="Number of generation tokens processed.",
7877
kind=pb_utils.MetricFamily.HISTOGRAM,
7978
)
80-
# 'best_of' metric has been hidden since vllm 0.6.3
81-
# https://github.com/vllm-project/vllm/commit/cbc2ef55292b2af6ff742095c030e8425124c005
82-
if _VLLM_VERSION < "0.6.3":
83-
self.histogram_best_of_request_family = pb_utils.MetricFamily(
84-
name="vllm:request_params_best_of",
85-
description="Histogram of the best_of request parameter.",
86-
kind=pb_utils.MetricFamily.HISTOGRAM,
87-
)
8879
self.histogram_n_request_family = pb_utils.MetricFamily(
8980
name="vllm:request_params_n",
9081
description="Histogram of the n request parameter.",
@@ -163,13 +154,6 @@ def __init__(self, labels: List[str], max_model_len: int):
163154
buckets=build_1_2_5_buckets(max_model_len),
164155
)
165156
)
166-
if _VLLM_VERSION < "0.6.3":
167-
self.histogram_best_of_request = (
168-
self.histogram_best_of_request_family.Metric(
169-
labels=labels,
170-
buckets=[1, 2, 5, 10, 20],
171-
)
172-
)
173157
self.histogram_n_request = self.histogram_n_request_family.Metric(
174158
labels=labels,
175159
buckets=[1, 2, 5, 10, 20],
@@ -256,10 +240,6 @@ def log(self, stats: VllmStats) -> None:
256240
),
257241
(self.metrics.histogram_n_request, stats.n_requests),
258242
]
259-
if _VLLM_VERSION < "0.6.3":
260-
histogram_metrics.append(
261-
(self.metrics.histogram_best_of_request, stats.best_of_requests)
262-
)
263243
for metric, data in counter_metrics:
264244
self._log_counter(metric, data)
265245
for metric, data in histogram_metrics:

0 commit comments

Comments
 (0)