Merge pull request #1101 from terrastackai/cp_fixes_vllm_plugins

romeokienzler · web-flow · commit 7d52e4e542d1 · 2026-02-11T14:57:23.000+01:00
Various fixes to vLLM plugins
diff --git a/integrationtests/vLLM/test_segmentation_io_processor.py b/integrationtests/vLLM/test_segmentation_io_processor.py
@@ -77,7 +77,7 @@ def test_serving_segmentation_plugin(get_server, model_name, input_name):
         # This is just in case the test ends up with a GPU of less memory than an A100-80GB.
         # Just to avoid OOMing in the CI
         "--max-num-seqs",
-        "8",
+        "32",
         "--io-processor-plugin",
         io_processor_plugin,
         "--model-impl",
diff --git a/pyproject.toml b/pyproject.toml
@@ -141,7 +141,7 @@ geobenchv2 = [
 
 vllm = [
   "geobenchv2==0.9",
-  "vllm>=0.12,<=0.14.0",
+  "vllm>=0.12,!=0.15.*",
 ]
 
 vllm_test = [
diff --git a/terratorch/vllm/plugins/segmentation/segmentation_io_processor.py b/terratorch/vllm/plugins/segmentation/segmentation_io_processor.py
@@ -19,6 +19,7 @@
 from einops import rearrange
 import logging
 from terratorch.vllm.plugins import generate_datamodule
+from terratorch.vllm.utils import check_vllm_version
 import uuid
 import warnings
 from vllm.config import VllmConfig
@@ -325,7 +326,7 @@ def pre_process(
         # Just run the async function froma. synchronous context.
         # Since we are already in the vLLM server event loop we use that one.
         loop = asyncio.get_event_loop()
-        loop.run_until_complete(self.pre_process_async(prompt, request_id, **kwargs))
+        return loop.run_until_complete(self.pre_process_async(prompt, request_id, **kwargs))
 
 
     async def pre_process_async(
@@ -414,16 +415,23 @@ async def pre_process_async(
                 window["image"] = window["image"][None, :, :, :]
                 window = self.datamodule.aug(window)["image"]
 
-            prompt = {
-                "prompt_token_ids": [1],
-                "multi_modal_data": {
-                    "pixel_values": window.to(torch.float16)[0],
-                }
+            multi_modal_data = {
+                "pixel_values": window.to(torch.float16)[0],
             }
-
             # not all models use location coordinates, so we don't bother sending them to vLLM if not needed
             if "location_coords" in self.model_config["input"]["data"]:
-                prompt["multi_modal_data"]["location_coords"] = location_coords
+                multi_modal_data["location_coords"] = location_coords
+
+            # after v0.14.0 vLLM has changed the input structure for multimodal data
+            if check_vllm_version("0.14.0", ">"):
+                multi_modal_data = {
+                    "image": multi_modal_data
+                }
+
+            prompt = {
+                "prompt_token_ids": [1],
+                "multi_modal_data": multi_modal_data
+            }
 
             prompts.append(prompt)
 
diff --git a/terratorch/vllm/plugins/segmentation/terramind_segmentation_io_processor.py b/terratorch/vllm/plugins/segmentation/terramind_segmentation_io_processor.py
@@ -22,6 +22,7 @@
 from terratorch.tasks.tiled_inference import generate_tiled_inference_output, prepare_tiled_inference_input
 from terratorch.vllm.plugins import generate_datamodule
 from terratorch.cli_tools import write_tiff
+from terratorch.vllm.utils import check_vllm_version
 from .utils import download_file_async, get_filename_from_url, path_or_tmpdir, to_base64_tiff
 
 from .types import PluginConfig, RequestData, RequestOutput, TiledInferenceParameters
@@ -146,7 +147,7 @@ def pre_process(
         # Just run the async function froma. synchronous context.
         # Since we are already in the vLLM server event loop we use that one.
         loop = asyncio.get_event_loop()
-        loop.run_until_complete(self.pre_process_async(prompt, request_id, **kwargs))
+        return loop.run_until_complete(self.pre_process_async(prompt, request_id, **kwargs))
 
 
     async def pre_process_async(
@@ -193,10 +194,17 @@ async def pre_process_async(
         for tile in prompt_data:
             reshaped_tile = tensor_reshape_fn(tile.input_data)
             # TODO: Check if there's a better way of getting the data in the correct data type ouf of the box.
-            vllm_input = {mod: tensor.to(torch.float16) for mod, tensor in reshaped_tile.items()}
+            multi_modal_data = {mod: tensor.to(torch.float16) for mod, tensor in reshaped_tile.items()}
+
+            # after v0.14.0 vLLM has changed the input structure for multimodal data
+            if check_vllm_version("0.14.0", ">"):
+                multi_modal_data = {
+                    "image": multi_modal_data
+                }
+
             prompt = {
                 "prompt_token_ids": [1],
-                "multi_modal_data": vllm_input
+                "multi_modal_data": multi_modal_data
             }
 
             prompts.append(prompt)
diff --git a/terratorch/vllm/utils.py b/terratorch/vllm/utils.py
@@ -9,6 +9,29 @@
 from typing import List,Dict
 from enum import Enum
 
+from packaging import version
+from vllm import __version__ as vllm_version
+
+
+def check_vllm_version(target_version: str, comparison: str):
+    current_version = version.parse(vllm_version)
+    target = version.parse(target_version)
+
+    if comparison == "==":
+        return current_version == target
+    elif comparison == "!=":
+        return current_version != target
+    elif comparison == "<":
+        return current_version < target
+    elif comparison == "<=":
+        return current_version <= target
+    elif comparison == ">":
+        return current_version > target
+    elif comparison == ">=":
+        return current_version >= target
+    else:
+        raise ValueError(f"Invalid comparison operator: {comparison}")
+
 class InputTypeEnum(str, Enum):
     tensor= 'torch.Tensor'
 

Original file line number	Diff line number	Diff line change
`@@ -141,7 +141,7 @@ geobenchv2 = [`
`141`	`141`
`142`	`142`	`vllm = [`
`143`	`143`	`"geobenchv2==0.9",`
`144`		`- "vllm>=0.12,<=0.14.0",`
	`144`	`+ "vllm>=0.12,!=0.15.*",`
`145`	`145`	`]`
`146`	`146`
`147`	`147`	`vllm_test = [`