refactor: align multimodal example port allocation with vLLM components

nnshah1 · claude · nnshah1 · commit 3992615983c5 · 2025-11-06T10:54:37.000-08:00
Replace dynamic port allocation with environment variable-based configuration to match the approach used in components/src/dynamo/vllm/args.py. Changes: - Remove runtime.allocate_port_block() in favor of DYN_VLLM_KV_EVENT_PORT env var - Add port validation with registered port range (1024-49151) - Replace set_side_channel_host_and_port() with ensure_side_channel_host() - Update configure_ports() to be synchronous and environment-based - Add create_kv_events_config() function matching vLLM components pattern - Remove side_channel_port from Config class - Simplify overwrite_args() to follow vLLM components structure 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/examples/multimodal/components/worker.py b/examples/multimodal/components/worker.py
@@ -418,7 +418,7 @@ def signal_handler():
     args, config = VllmBaseWorker.parse_args()
 
     # vLLM config overwrites
-    await configure_ports(runtime, config)
+    configure_ports(config)
     overwrite_args(config)
     await init(runtime, args, config)
 
diff --git a/examples/multimodal/utils/args.py b/examples/multimodal/utils/args.py
@@ -2,20 +2,16 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import argparse
-import json
 import logging
 import os
 import socket
 import sys
-import time
 from typing import Callable, List, Optional, Tuple
 
 from vllm.config import KVTransferConfig
 from vllm.distributed.kv_events import KVEventsConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 
-from dynamo.runtime import DistributedRuntime
-
 logger = logging.getLogger(__name__)
 
 DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
@@ -30,7 +26,6 @@ class Config:
     component: str
     endpoint: str
     kv_port: Optional[int] = None
-    side_channel_port: Optional[int] = None
 
     # mirror vLLM
     model: str
@@ -115,98 +110,167 @@ def base_parse_args(
     return args, config
 
 
-async def allocate_and_reserve_port(
-    runtime: DistributedRuntime,
-    namespace: str,
-    worker_id: str,
-    reason: str,
-) -> int:
-    """
-    Get an OS-assigned port and atomically reserve it.
-    Retries until successful or internal max attempts reached.
+# Port range constants
+REGISTERED_PORT_MIN = 1024
+REGISTERED_PORT_MAX = 49151
+
+
+def _resolve_port(env_var: str, default_port: int) -> int:
     """
+    Resolve port from environment variable with validation.
 
-    context_json = {
-        "worker_id": worker_id,
-        "reason": reason,
-        "reserved_at": time.time(),
-        "pid": os.getpid(),
-        "block_size": 1,
-    }
+    Args:
+        env_var: Environment variable name
+        default_port: Default port if env var not set
+
+    Returns:
+        Validated port number
+
+    Raises:
+        ValueError: If port is invalid or out of range
+    """
+    env_value = os.getenv(env_var)
+    if env_value is None:
+        port = default_port
+    else:
+        try:
+            port = int(env_value)
+        except ValueError as exc:
+            raise ValueError(
+                f"{env_var} must be an integer port number, got {env_value!r}."
+            ) from exc
+
+    if not (REGISTERED_PORT_MIN <= port <= REGISTERED_PORT_MAX):
+        raise ValueError(
+            f"{env_var} port {port} is outside of the registered port range "
+            f"({REGISTERED_PORT_MIN}-{REGISTERED_PORT_MAX})."
+        )
 
-    # Any ephemeral port, equivalent to binding port 0
-    port_range_min = 32_768
-    port_range_max = 60_999
-    allocated_ports = await runtime.allocate_port_block(
-        namespace,
-        port_range_min,
-        port_range_max,
-        1,  # how many ports to allocate
-        json.dumps(context_json),
-    )
-    if not allocated_ports:
-        raise RuntimeError("allocate_port_block returned no ports")
-    port = allocated_ports[0]
-    logger.debug(f"Reserved OS-assigned port {port} for {worker_id}")
     return port
 
 
-async def configure_ports(runtime: DistributedRuntime, config: Config):
-    """Configure including port allocation and vLLM overrides."""
+# Environment variables configuration  
+environment_variables = {
+    # Port used for KV events publishing to the frontend
+    # Note: This env variable is ignored if explicitly using --kv-events-config ''
+    "DYN_VLLM_KV_EVENT_PORT": lambda: _resolve_port("DYN_VLLM_KV_EVENT_PORT", 20080),
+}
 
-    # First, allocate ports
-    dp_rank = config.engine_args.data_parallel_rank or 0
-    worker_id = f"vllm-{config.component}-dp{dp_rank}"
-
-    # Allocate KV events port
-    kv_port = await allocate_and_reserve_port(
-        runtime=runtime,
-        namespace=config.namespace,
-        worker_id=f"{worker_id}",
-        reason="zmq_kv_event_port",
-    )
 
-    # Allocate side channel port
-    side_channel_port = await allocate_and_reserve_port(
-        runtime=runtime,
-        namespace=config.namespace,
-        worker_id=f"{worker_id}",
-        reason="nixl_side_channel_port",
-    )
+def __getattr__(name: str):
+    """
+    Gets environment variables lazily.
+    """
+    if name in environment_variables:
+        return environment_variables[name]()
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def get_host_ip() -> str:
+    """Get the IP address of the host for side-channel coordination."""
+    try:
+        host_name = socket.gethostname()
+    except socket.error as exc:
+        logger.warning("Failed to get hostname: %s, falling back to 127.0.0.1", exc)
+        return "127.0.0.1"
+
+    try:
+        host_ip = socket.gethostbyname(host_name)
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as test_socket:
+            test_socket.bind((host_ip, 0))
+        return host_ip
+    except socket.gaierror as exc:
+        logger.warning(
+            "Hostname %s cannot be resolved: %s, falling back to 127.0.0.1",
+            host_name,
+            exc,
+        )
+        return "127.0.0.1"
+    except socket.error as exc:
+        logger.warning(
+            "Hostname %s is not usable for binding: %s, falling back to 127.0.0.1",
+            host_name,
+            exc,
+        )
+        return "127.0.0.1"
 
-    # Update config with allocated ports
-    config.kv_port = kv_port
-    config.side_channel_port = side_channel_port
 
+def ensure_side_channel_host():
+    """Ensure the NIXL side-channel host is available without overriding user settings."""
 
-def overwrite_args(config):
-    """Set vLLM defaults for Dynamo."""
-    assert config.kv_port is not None, "Must set the kv_port, use configure_ports"
-    assert (
-        config.side_channel_port is not None
-    ), "Must set the side_channel_port, use configure_ports"
+    existing_host = os.getenv("VLLM_NIXL_SIDE_CHANNEL_HOST")
+    if existing_host:
+        logger.debug(
+            "Preserving existing VLLM_NIXL_SIDE_CHANNEL_HOST=%s", existing_host
+        )
+        return
+
+    host_ip = get_host_ip()
+    os.environ["VLLM_NIXL_SIDE_CHANNEL_HOST"] = host_ip
+    logger.debug("Set VLLM_NIXL_SIDE_CHANNEL_HOST to %s", host_ip)
+
+
+def configure_ports(config: Config):
+    """Configure port settings from dedicated environment overrides."""
+
+    if config.engine_args.enable_prefix_caching:
+        config.kv_port = globals()["DYN_VLLM_KV_EVENT_PORT"]
 
+    # Always ensure side channel host for NIXL connector
+    ensure_side_channel_host()
+
+
+def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]:
+    """Create KVEventsConfig for prefix caching if needed."""
+    # If prefix caching is not enabled, no events config needed
+    if not config.engine_args.enable_prefix_caching:
+        return None
+
+    # If user provided their own config, use that
+    if c := getattr(config.engine_args, "kv_events_config", None):
+        logger.info(f"Using user-provided kv_events_config {c}")
+        return None
+
+    # Create default events config for prefix caching
+    if config.kv_port is None:
+        raise ValueError(
+            "config.kv_port is not set; call configure_ports(...) before overwrite_args "
+            "or provide --kv-event-config to supply an explicit endpoint."
+        )
     dp_rank = config.engine_args.data_parallel_rank or 0
 
+    return KVEventsConfig(
+        enable_kv_cache_events=True,
+        publisher="zmq",
+        endpoint=f"tcp://*:{config.kv_port - dp_rank}",  # vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM
+    )
+
+
+def overwrite_args(config):
+    """Set vLLM defaults for Dynamo."""
     defaults = {
         "task": "generate",
+        # As of vLLM >=0.10.0 the engine unconditionally calls
+        # `sampling_params.update_from_tokenizer(...)`, so we can no longer
+        # skip tokenizer initialisation.  Setting this to **False** avoids
+        # a NoneType error when the processor accesses the tokenizer.
         "skip_tokenizer_init": False,
         "disable_log_requests": True,
-        "enable_prefix_caching": True,
-        # KV routing relies on logging KV metrics
         "disable_log_stats": False,
-        # Always setting up kv transfer for disagg
-        "kv_transfer_config": KVTransferConfig(
-            kv_connector="NixlConnector", kv_role="kv_both"
-        ),
-        "kv_events_config": KVEventsConfig(
-            enable_kv_cache_events=True,
-            publisher="zmq",
-            endpoint=f"tcp://*:{config.kv_port - dp_rank}",  # vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM
-        ),
     }
 
-    set_side_channel_host_and_port(config)
+    # Set KV transfer config for NIXL connector
+    defaults["kv_transfer_config"] = KVTransferConfig(
+        kv_connector="NixlConnector", kv_role="kv_both"
+    )
+
+    kv_events_config = create_kv_events_config(config)
+    logger.info(
+        f"Using Dynamo default kv_events_config for publishing kv events over zmq: {kv_events_config}"
+    )
+
+    if kv_events_config:
+        defaults["kv_events_config"] = kv_events_config
 
     logger.debug("Setting Dynamo defaults for vLLM")
     for key, value in defaults.items():
@@ -217,23 +281,3 @@ def overwrite_args(config):
             raise ValueError(f"{key} not found in AsyncEngineArgs from vLLM.")
 
 
-def set_side_channel_host_and_port(config: Config, hostname: Optional[str] = None):
-    """vLLM V1 NixlConnector creates a side channel to exchange metadata with other NIXL connectors.
-    This sets the port number for the side channel.
-    """
-    if hostname is None:
-        hostname = socket.gethostname()
-        # Test if hostname is usable by attempting to bind to it
-        try:
-            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as test_socket:
-                test_socket.bind((hostname, 0))
-        except (socket.error, socket.gaierror):
-            # If hostname is not usable, fall back to localhost
-            logger.warning(
-                f"Hostname '{hostname}' is not usable, falling back to '127.0.0.1'"
-            )
-            hostname = "127.0.0.1"
-
-    os.environ["VLLM_NIXL_SIDE_CHANNEL_HOST"] = hostname
-    os.environ["VLLM_NIXL_SIDE_CHANNEL_PORT"] = str(config.side_channel_port)
-    logger.debug(f"Set NIXL side channel to {hostname}:{config.side_channel_port}")