Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions vllm_omni/benchmarks/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ def calculate_metrics(
good_completed += 1

if completed == 0:
warnings.formatwarning = (
lambda msg, category, filename, lineno, line=None: f"{filename}:{lineno}: {category.__name__}: {msg}\n"
warnings.formatwarning = lambda msg, category, filename, lineno, line=None: (
f"{filename}:{lineno}: {category.__name__}: {msg}\n"
)
warnings.warn(
"All requests failed. This is likely due to a misconfiguration on the benchmark arguments.",
Expand Down
2 changes: 1 addition & 1 deletion vllm_omni/diffusion/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# used in distributed environment to determine the master address
"MASTER_ADDR": lambda: os.getenv("MASTER_ADDR", ""),
# used in distributed environment to manually set the communication port
"MASTER_PORT": lambda: (int(os.getenv("MASTER_PORT", "0")) if "MASTER_PORT" in os.environ else None),
"MASTER_PORT": lambda: int(os.getenv("MASTER_PORT", "0")) if "MASTER_PORT" in os.environ else None,
# path to cudatoolkit home directory, under which should be bin, include,
# and lib directories.
"CUDA_HOME": lambda: os.environ.get("CUDA_HOME", None),
Expand Down
653 changes: 109 additions & 544 deletions vllm_omni/entrypoints/openai/api_server.py

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions vllm_omni/entrypoints/openai/diffusion_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from vllm.entrypoints.openai.engine.protocol import (
ModelCard,
ModelList,
ModelPermission,
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good: Module extraction

Moving DiffusionServingModels to its own module improves organization. The change from _DiffusionServingModels (private) to DiffusionServingModels (public) and from _base_model_paths to base_model_paths makes it properly accessible.

from vllm.entrypoints.openai.models.protocol import BaseModelPath


class DiffusionServingModels:
"""Minimal OpenAIServingModels implementation for diffusion-only servers.

vLLM's /v1/models route expects `app.state.openai_serving_models` to expose
`show_available_models()`. In pure diffusion mode we don't initialize the
full OpenAIServingModels (it depends on LLM-specific processors), so we
provide a lightweight fallback.
"""

def __init__(self, base_model_paths: list[BaseModelPath]) -> None:
self.base_model_paths = base_model_paths

async def show_available_models(self) -> ModelList:
return ModelList(
data=[
ModelCard(
id=base_model.name,
root=base_model.model_path,
permission=[ModelPermission()],
)
for base_model in self.base_model_paths
]
)

@property
def model_name(self) -> str:
if not self.base_model_paths:
raise ValueError("No base models are configured; cannot determine model_name.")
return self.base_model_paths[0].name
4 changes: 4 additions & 0 deletions vllm_omni/entrypoints/openai/protocol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from vllm_omni.entrypoints.openai.protocol.chat_completion import OmniChatCompletionStreamResponse
from vllm_omni.entrypoints.openai.protocol.images import (
ImageData,
ImageEditRequest,
ImageEditResponse,
ImageGenerationRequest,
ImageGenerationResponse,
ResponseFormat,
Expand All @@ -19,6 +21,8 @@
"ImageData",
"ImageGenerationRequest",
"ImageGenerationResponse",
"ImageEditRequest",
"ImageEditResponse",
"ResponseFormat",
"VideoData",
"VideoGenerationRequest",
Expand Down
94 changes: 91 additions & 3 deletions vllm_omni/entrypoints/openai/protocol/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from enum import Enum
from typing import Any

from pydantic import BaseModel, Field, field_validator
from fastapi import UploadFile
from pydantic import BaseModel, ConfigDict, Field, field_validator


class ResponseFormat(str, Enum):
Expand Down Expand Up @@ -127,5 +128,92 @@ class ImageGenerationResponse(BaseModel):

created: int = Field(..., description="Unix timestamp of when the generation completed")
data: list[ImageData] = Field(..., description="Array of generated images")
output_format: str = Field(None, description="The output format of the image generation")
size: str = Field(None, description="The size of the image generated")
output_format: str | None = None
size: str | None = None


class ImageEditResponse(BaseModel):
"""
OpenAI DALL-E compatible image edit response.

Returns generated images with metadata.
"""

created: int = Field(..., description="Unix timestamp of when the generation completed")
data: list[ImageData] = Field(..., description="Array of generated images")
output_format: str = Field(..., description="The output format of the image generation")
size: str = Field(..., description="The size of the image generated")


class ImageEditRequest(BaseModel):
image: list[UploadFile] | None = Field(default=None, description="Image file to edit")
url: list[str] | None = Field(default=None, description="URL of the image to edit")
prompt: str = Field(..., description="Text description of the desired image edit")
model: str | None = Field(
default=None,
description="Model to use (optional, uses server's configured model if omitted)",
)
n: int = Field(default=1, ge=1, le=10, description="Number of images to generate")
size: str | None = Field(
default=None,
description="Image dimensions in WIDTHxHEIGHT format (e.g., '1024x1024', uses model defaults if omitted)",
)
output_format: str = Field(
default="png", description="The output format of the image generation (e.g., 'png', 'jpeg')"
)
background: str | None = Field(
default=None,
description=(
"Background color for transparent areas in the edited image (e.g., '#FFFFFF' for white). "
"If not specified, transparent areas will remain transparent in formats that support it (e.g., PNG)."
),
)
response_format: ResponseFormat = Field(default=ResponseFormat.B64_JSON, description="Format of the returned image")
user: str | None = Field(default=None, description="User identifier for tracking")

# vllm-omni extensions for diffusion control
negative_prompt: str | None = Field(default=None, description="Text describing what to avoid in the image")
num_inference_steps: int | None = Field(
default=None,
ge=1,
le=200,
description="Number of diffusion sampling steps (uses model defaults if not specified)",
)
guidance_scale: float | None = Field(
default=None,
ge=0.0,
le=20.0,
description="Classifier-free guidance scale (uses model defaults if not specified)",
)
true_cfg_scale: float | None = Field(
default=None,
ge=0.0,
le=20.0,
description="True CFG scale (model-specific parameter, may be ignored if not supported)",
)
seed: int | None = Field(default=None, description="Random seed for reproducibility")
generator_device: str | None = Field(
default=None,
description="Device for the seeded torch.Generator (e.g. 'cpu', 'cuda'). Defaults to the runner's device.",
)
output_compression: int = Field(
default=100,
ge=0,
le=100,
description=(
"Output compression/quality level for edited images (0-100). "
"Higher values preserve higher quality (typically larger files), lower values apply stronger compression. "
"Defaults to 100."
),
)
lora: dict[str, Any] | None = Field(
default=None,
description=(
"Optional LoRA adapter for this request. Expected shape: "
"{name/path/scale/int_id}. Field names are flexible "
"(e.g. name|lora_name|adapter, path|lora_path|local_path, "
"scale|lora_scale, int_id|lora_int_id)."
),
)

model_config = ConfigDict(arbitrary_types_allowed=True)
Loading