Remove vLLM 0.6.x version checks

kthui · kthui · commit 8b17341ae446 · 2024-12-05T12:37:57.000-08:00
diff --git a/src/model.py b/src/model.py
@@ -43,7 +43,6 @@
 from vllm.lora.request import LoRARequest
 from vllm.sampling_params import SamplingParams
 from vllm.utils import random_uuid
-from vllm.version import __version__ as _VLLM_VERSION
 
 from utils.metrics import VllmStatLogger
 
@@ -74,6 +73,12 @@ def _auto_complete_inputs_and_outputs(auto_complete_model_config):
         # Inputs expected by the backend.
         inputs = [
             {"name": "text_input", "data_type": "TYPE_STRING", "dims": [1]},
+            {
+                "name": "image",
+                "data_type": "TYPE_STRING",
+                "dims": [-1],  # can be multiple images as separate elements
+                "optional": True,
+            },
             {
                 "name": "stream",
                 "data_type": "TYPE_BOOL",
@@ -123,15 +128,6 @@ def _auto_complete_inputs_and_outputs(auto_complete_model_config):
                 "optional": True,
             },
         ]
-        if _VLLM_VERSION >= "0.6.3.post1":
-            inputs.append(
-                {
-                    "name": "image",
-                    "data_type": "TYPE_STRING",
-                    "dims": [-1],  # can be multiple images as separate elements
-                    "optional": True,
-                }
-            )
         # Outputs expected by the backend.
         outputs = [
             {"name": "text_output", "data_type": "TYPE_STRING", "dims": [-1]},
@@ -352,19 +348,18 @@ def _get_input_tensors(self, request):
             prompt = prompt.decode("utf-8")
 
         # image
-        if _VLLM_VERSION >= "0.6.3.post1":
-            images = pb_utils.get_input_tensor_by_name(request, "image")
-            if images:
-                images_vllm = []
-                for image_np in images.as_numpy():
-                    image_b = base64.b64decode(image_np.decode("utf-8"))
-                    image_rgb = Image.open(BytesIO(image_b)).convert("RGB")
-                    images_vllm.append(image_rgb)
-                if len(images_vllm) > 0:
-                    prompt = {
-                        "prompt": prompt,
-                        "multi_modal_data": {"image": images_vllm},
-                    }
+        images = pb_utils.get_input_tensor_by_name(request, "image")
+        if images:
+            images_vllm = []
+            for image_np in images.as_numpy():
+                image_b = base64.b64decode(image_np.decode("utf-8"))
+                image_rgb = Image.open(BytesIO(image_b)).convert("RGB")
+                images_vllm.append(image_rgb)
+            if len(images_vllm) > 0:
+                prompt = {
+                    "prompt": prompt,
+                    "multi_modal_data": {"image": images_vllm},
+                }
 
         # stream
         stream = pb_utils.get_input_tensor_by_name(request, "stream")
diff --git a/src/utils/metrics.py b/src/utils/metrics.py
@@ -32,7 +32,6 @@
 from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase
 from vllm.engine.metrics import Stats as VllmStats
 from vllm.engine.metrics import SupportsMetricsInfo, build_1_2_5_buckets
-from vllm.version import __version__ as _VLLM_VERSION
 
 
 class TritonMetrics:
@@ -77,14 +76,6 @@ def __init__(self, labels: List[str], max_model_len: int):
             description="Number of generation tokens processed.",
             kind=pb_utils.MetricFamily.HISTOGRAM,
         )
-        # 'best_of' metric has been hidden since vllm 0.6.3
-        # https://github.com/vllm-project/vllm/commit/cbc2ef55292b2af6ff742095c030e8425124c005
-        if _VLLM_VERSION < "0.6.3":
-            self.histogram_best_of_request_family = pb_utils.MetricFamily(
-                name="vllm:request_params_best_of",
-                description="Histogram of the best_of request parameter.",
-                kind=pb_utils.MetricFamily.HISTOGRAM,
-            )
         self.histogram_n_request_family = pb_utils.MetricFamily(
             name="vllm:request_params_n",
             description="Histogram of the n request parameter.",
@@ -163,13 +154,6 @@ def __init__(self, labels: List[str], max_model_len: int):
                 buckets=build_1_2_5_buckets(max_model_len),
             )
         )
-        if _VLLM_VERSION < "0.6.3":
-            self.histogram_best_of_request = (
-                self.histogram_best_of_request_family.Metric(
-                    labels=labels,
-                    buckets=[1, 2, 5, 10, 20],
-                )
-            )
         self.histogram_n_request = self.histogram_n_request_family.Metric(
             labels=labels,
             buckets=[1, 2, 5, 10, 20],
@@ -256,10 +240,6 @@ def log(self, stats: VllmStats) -> None:
             ),
             (self.metrics.histogram_n_request, stats.n_requests),
         ]
-        if _VLLM_VERSION < "0.6.3":
-            histogram_metrics.append(
-                (self.metrics.histogram_best_of_request, stats.best_of_requests)
-            )
         for metric, data in counter_metrics:
             self._log_counter(metric, data)
         for metric, data in histogram_metrics: