添加部分 文生视频代码（未测试）

shell-nlp · shell-nlp · commit 5fcbaa9cbaf7 · 2025-06-13T11:49:27.000+08:00
diff --git a/gpt_server/model_worker/base/model_worker_base.py b/gpt_server/model_worker/base/model_worker_base.py
@@ -5,6 +5,7 @@
 from abc import ABC, abstractmethod
 from fastapi import BackgroundTasks, Request, FastAPI
 from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
 from fastchat.utils import SEQUENCE_LENGTH_KEYS
 from loguru import logger
 import os
@@ -16,11 +17,12 @@
     AutoConfig,
 )
 import uuid
-from gpt_server.utils import get_free_tcp_port
+from gpt_server.utils import get_free_tcp_port, STATIC_DIR, local_ip
 from gpt_server.model_worker.base.base_model_worker import BaseModelWorker
 
 worker = None
 app = FastAPI()
+app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 
 
 def get_context_length_(config):
@@ -263,6 +265,8 @@ def run(cls):
         controller_address = args.controller_address
 
         port = get_free_tcp_port()
+        os.environ["WORKER_PORT"] = str(port)
+        os.environ["WORKER_HOST"] = str(local_ip)
         worker_addr = f"http://{host}:{port}"
 
         @app.on_event("startup")
@@ -409,9 +413,9 @@ async def api_get_embeddings(request: Request):
     params = await request.json()
     await acquire_worker_semaphore()
     logger.debug(f"params {params}")
-    embedding = await worker.get_image_output(params)
+    result = await worker.get_image_output(params)
     release_worker_semaphore()
-    return JSONResponse(content=embedding)
+    return JSONResponse(content=result)
 
 
 @app.post("/worker_get_classify")
diff --git a/gpt_server/model_worker/wan.py b/gpt_server/model_worker/wan.py
@@ -0,0 +1,83 @@
+import asyncio
+
+import io
+import os
+from typing import List
+import uuid
+from loguru import logger
+import shortuuid
+from gpt_server.model_worker.base.model_worker_base import ModelWorkerBase
+from gpt_server.model_worker.utils import pil_to_base64
+from gpt_server.utils import STATIC_DIR
+import torch
+from diffusers import AutoencoderKLWan, WanPipeline
+from diffusers.utils import export_to_video
+
+root_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+
+
+class WanWorker(ModelWorkerBase):
+    def __init__(
+        self,
+        controller_addr: str,
+        worker_addr: str,
+        worker_id: str,
+        model_path: str,
+        model_names: List[str],
+        limit_worker_concurrency: int,
+        conv_template: str = None,  # type: ignore
+    ):
+        super().__init__(
+            controller_addr,
+            worker_addr,
+            worker_id,
+            model_path,
+            model_names,
+            limit_worker_concurrency,
+            conv_template,
+            model_type="image",
+        )
+        backend = os.environ["backend"]
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        vae = AutoencoderKLWan.from_pretrained(
+            model_path, subfolder="vae", torch_dtype=torch.float32
+        )
+        self.pipe = WanPipeline.from_pretrained(
+            model_path, vae=vae, torch_dtype=torch.bfloat16
+        ).to(self.device)
+        logger.warning(f"模型：{model_names[0]}")
+
+    async def get_image_output(self, params):
+        prompt = params["prompt"]
+        negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
+        output = self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=480,
+            width=832,
+            num_frames=81,
+            guidance_scale=5.0,
+        ).frames[0]
+
+        # 生成唯一文件名（避免冲突）
+        file_name = str(uuid.uuid4()) + ".mp4"
+        save_path = STATIC_DIR / file_name
+        export_to_video(output, save_path, fps=15)
+        WORKER_PORT = os.environ["WORKER_PORT"]
+        WORKER_HOST = os.environ["WORKER_HOST"]
+        url = f"http://{WORKER_HOST}:{WORKER_PORT}/static/{file_name}"
+        result = {
+            "created": shortuuid.random(),
+            "data": [{"url": url}],
+            "usage": {
+                "total_tokens": 0,
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+            },
+        }
+        return result
+
+
+if __name__ == "__main__":
+    WanWorker.run()
diff --git a/gpt_server/openai_api_protocol/custom_api_protocol.py b/gpt_server/openai_api_protocol/custom_api_protocol.py
@@ -21,6 +21,10 @@ class ImagesGenRequest(BaseModel):
         default="png",
         description="png, jpeg, or webp",
     )
+    model_type: Literal["t2v", "t2i"] = Field(
+        default="t2i",
+        description="t2v: 文生视频 t2i: 文生图",
+    )
 
 
 # copy from https://github.com/remsky/Kokoro-FastAPI/blob/master/api/src/routers/openai_compatible.py
diff --git a/gpt_server/serving/openai_api_server.py b/gpt_server/serving/openai_api_server.py
@@ -732,11 +732,11 @@ async def speech(request: ImagesGenRequest):
     error_check_ret = check_model(request)
     if error_check_ret is not None:
         return error_check_ret
-
     payload = {
         "model": request.model,
         "prompt": request.prompt,
         "output_format": request.output_format,
+        "model_type": request.model_type,
     }
     result = await get_images_gen(payload=payload)
     return result
diff --git a/gpt_server/utils.py b/gpt_server/utils.py
@@ -10,8 +10,13 @@
 import psutil
 from rich import print
 import signal
+from pathlib import Path
 
+ENV = os.environ
 logger.add("logs/gpt_server.log", rotation="100 MB", level="INFO")
+root_dir = Path(__file__).parent
+STATIC_DIR = root_dir / "static"
+os.makedirs(STATIC_DIR, exist_ok=True)
 
 
 def kill_child_processes(parent_pid, including_parent=False):
@@ -111,8 +116,7 @@ def start_api_server(config: dict):
 
 def get_model_types():
     model_types = []
-    root_dir = os.path.dirname(__file__)
-    model_worker_path = os.path.join(root_dir, "model_worker")
+    model_worker_path = root_dir / "model_worker"
     # 遍历目录及其子目录
     for root, dirs, files in os.walk(model_worker_path):
         for file in files:
@@ -352,6 +356,18 @@ def is_port_in_use(port):
             return True
 
 
+def get_physical_ip():
+    import socket
+
+    local_ip = socket.gethostbyname(socket.getfqdn(socket.gethostname()))
+    return local_ip
+
+
+try:
+    local_ip = get_physical_ip()
+except Exception as e:
+    local_ip = ENV.get("local_ip", "127.0.0.1")
+
 model_type_mapping = {
     "yi": "yi",
     "qwen": "qwen",
@@ -374,12 +390,13 @@ def is_port_in_use(port):
     from lmdeploy.archs import get_model_arch
     from lmdeploy.cli.utils import get_chat_template
 
+    print(local_ip)
     ckpt = "/home/dev/model/Qwen/Qwen3-32B/"  # internlm2
     chat_template = get_chat_template(ckpt)
     model_type = get_names_from_model(ckpt)
     arch = get_model_arch(ckpt)
+
     print(chat_template)
     # print(arch)
     print(model_type)
     print(model_type[1] == "base")
-    print()

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,10 @@ class ImagesGenRequest(BaseModel):`
`21`	`21`	`default="png",`
`22`	`22`	`description="png, jpeg, or webp",`
`23`	`23`	`)`
	`24`	`+ model_type: Literal["t2v", "t2i"] = Field(`
	`25`	`+ default="t2i",`
	`26`	`+ description="t2v: 文生视频 t2i: 文生图",`
	`27`	`+ )`
`24`	`28`
`25`	`29`
`26`	`30`	`# copy from https://github.com/remsky/Kokoro-FastAPI/blob/master/api/src/routers/openai_compatible.py`