[V0 Deprecation] Remove V0-only methods in multi-modal registry (vllm-project#25362)

DarkLight1337 · choprahetarth · commit 2acc3d958044 · 2025-10-11T12:24:42.000-07:00
Signed-off-by: DarkLight1337 &lt;tlleungac@connect.ust.hk&gt;
diff --git a/tests/models/multimodal/generation/test_qwen2_vl.py b/tests/models/multimodal/generation/test_qwen2_vl.py
@@ -209,7 +209,6 @@ def get_image_embeds(model):
             return visual(pixel_values_on_device,
                           grid_thw=video_grid_thw_on_device).cpu()
 
-    # V1 Test: this calls a V0 internal.
     video_embeds = torch.concat(llm.apply_model(get_image_embeds))
 
     # split into original batches
diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
@@ -12,8 +12,7 @@
                                                cached_tokenizer_from_config)
 from vllm.utils import ClassRegistry
 
-from .cache import (BaseMultiModalProcessorCache,
-                    processor_only_cache_from_config)
+from .cache import BaseMultiModalProcessorCache
 from .processing import BaseMultiModalProcessor, BaseProcessingInfo
 from .profiling import (BaseDummyInputsBuilder, DummyDecoderData,
                         DummyEncoderData, MultiModalProfiler)
@@ -176,35 +175,6 @@ def get_max_tokens_per_item_by_nonzero_modality(
             if mm_limits[key] > 0
         }
 
-    # TODO: Remove once V0 is gone
-    def get_max_tokens_by_modality(
-        self,
-        model_config: "ModelConfig",
-    ) -> Mapping[str, int]:
-        """
-        Get the maximum number of tokens from each modality
-        for profiling the memory usage of a model.
-        """
-        cache = processor_only_cache_from_config(model_config, self)
-        mm_limits = self.get_mm_limits_per_prompt(model_config, cache=cache)
-        max_tokens_per_item = self.get_max_tokens_per_item_by_modality(
-            model_config,
-            cache=cache,
-        )
-
-        return {
-            key: mm_limits[key] * max_tokens_per_mm_item
-            for key, max_tokens_per_mm_item in max_tokens_per_item.items()
-        }
-
-    # TODO: Remove once V0 is gone
-    def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int:
-        """
-        Get the maximum number of multi-modal tokens
-        for profiling the memory usage of a model.
-        """
-        return sum(self.get_max_tokens_by_modality(model_config).values())
-
     def get_mm_limits_per_prompt(
         self,
         model_config: "ModelConfig",