Skip to content

Commit 0ba8d28

Browse files
committed
support qwen_image
1 parent c3071cd commit 0ba8d28

File tree

5 files changed

+156
-25
lines changed

5 files changed

+156
-25
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
<summary><b>2025</b></summary>
6262

6363
```plaintext
64+
2025-10-25 支持了 qwen_image 文生图模型
6465
2025-9-7 支持了 文本编辑模型 (代码样例见gpt_server/tests/test_image_edit.py)
6566
2025-8-8 初步支持了 embedding 的 vllm 加速
6667
2025-6-17 支持了 jina-reranker-m0 全球首个支持多模态多语言的重排模型
@@ -367,6 +368,7 @@ Chat UI界面:
367368
| Models / BackEnd | model_type |
368369
| :--------------: | :--------: |
369370
| flux | flux |
371+
| qwen_image | qwen_image |
370372

371373
<br>
372374

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import os
2+
from typing import List
3+
import uuid
4+
from loguru import logger
5+
import shortuuid
6+
from gpt_server.model_worker.base.model_worker_base import ModelWorkerBase
7+
from gpt_server.model_worker.utils import pil_to_base64
8+
import torch
9+
from diffusers import DiffusionPipeline
10+
from gpt_server.utils import STATIC_DIR
11+
12+
root_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
13+
14+
positive_magic = {
15+
"en": ", Ultra HD, 4K, cinematic composition.", # for english prompt
16+
"zh": ", 超清,4K,电影级构图.", # for chinese prompt
17+
}
18+
19+
aspect_ratios = {
20+
"1:1": (1328, 1328),
21+
"16:9": (1664, 928),
22+
"9:16": (928, 1664),
23+
"4:3": (1472, 1140),
24+
"3:4": (1140, 1472),
25+
"3:2": (1584, 1056),
26+
"2:3": (1056, 1584),
27+
}
28+
29+
width, height = aspect_ratios["16:9"]
30+
import re
31+
32+
33+
def contains_chinese(text):
34+
pattern = re.compile(r"[\u4e00-\u9fff]")
35+
return bool(pattern.search(text))
36+
37+
38+
class QwenImageWorker(ModelWorkerBase):
39+
def __init__(
40+
self,
41+
controller_addr: str,
42+
worker_addr: str,
43+
worker_id: str,
44+
model_path: str,
45+
model_names: List[str],
46+
limit_worker_concurrency: int,
47+
conv_template: str = None, # type: ignore
48+
):
49+
super().__init__(
50+
controller_addr,
51+
worker_addr,
52+
worker_id,
53+
model_path,
54+
model_names,
55+
limit_worker_concurrency,
56+
conv_template,
57+
model_type="image",
58+
)
59+
backend = os.environ["backend"]
60+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
61+
self.pipe = DiffusionPipeline.from_pretrained(
62+
model_path, torch_dtype=torch.bfloat16
63+
).to(self.device)
64+
65+
logger.warning(f"模型:{model_names[0]}")
66+
67+
async def get_image_output(self, params):
68+
prompt = params["prompt"]
69+
if contains_chinese(prompt):
70+
prompt += positive_magic["zh"]
71+
else:
72+
prompt += positive_magic["en"]
73+
response_format = params.get("response_format", "b64_json")
74+
image = self.pipe(
75+
prompt,
76+
negative_prompt=" ",
77+
height=height,
78+
width=width,
79+
num_inference_steps=50,
80+
true_cfg_scale=4.0,
81+
generator=torch.Generator(self.device).manual_seed(0),
82+
).images[0]
83+
result = {}
84+
if response_format == "b64_json":
85+
# Convert PIL image to base64
86+
base64 = pil_to_base64(pil_img=image)
87+
result = {
88+
"created": shortuuid.random(),
89+
"data": [{"b64_json": base64}],
90+
"usage": {
91+
"total_tokens": 0,
92+
"input_tokens": 0,
93+
"output_tokens": 0,
94+
"input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
95+
},
96+
}
97+
return result
98+
elif response_format == "url":
99+
# 生成唯一文件名(避免冲突)
100+
file_name = str(uuid.uuid4()) + ".png"
101+
save_path = STATIC_DIR / file_name
102+
image.save(save_path, format="PNG")
103+
WORKER_PORT = os.environ["WORKER_PORT"]
104+
WORKER_HOST = os.environ["WORKER_HOST"]
105+
url = f"http://{WORKER_HOST}:{WORKER_PORT}/static/{file_name}"
106+
result = {
107+
"created": shortuuid.random(),
108+
"data": [{"url": url}],
109+
"usage": {
110+
"total_tokens": 0,
111+
"input_tokens": 0,
112+
"output_tokens": 0,
113+
"input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
114+
},
115+
}
116+
return result
117+
118+
119+
if __name__ == "__main__":
120+
QwenImageWorker.run()

gpt_server/script/config_example.yaml

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ model_worker_args:
2525
limit_worker_concurrency: 1024 # worker的最大并发数,默认为 1024
2626

2727
models:
28+
# --------------- 支持的大语言模型样例 ---------------
2829
- qwen:
2930
# 大语言模型
3031
#自定义的模型名称
@@ -63,7 +64,7 @@ models:
6364
# - 1
6465

6566

66-
67+
# --------------- 支持的多模态模型样例 ---------------
6768
- internvl2:
6869
# 多模态模型
6970
#自定义的模型名称
@@ -80,7 +81,7 @@ models:
8081
- gpus:
8182
# - 1
8283
- 0
83-
84+
# --------------- 支持的rerank模型样例 ---------------
8485
- bge-reranker-base:
8586
# rerank模型
8687
alias: null # 别名
@@ -93,7 +94,7 @@ models:
9394
workers:
9495
- gpus:
9596
- 2
96-
# 部署 qwen3-reranker 样例
97+
9798
- qwen3-reranker:
9899
alias: null
99100
enable: true
@@ -108,7 +109,7 @@ models:
108109
workers:
109110
- gpus:
110111
- 6
111-
112+
# --------------- 支持的多模态多语言的重排模型样例 ---------------
112113
- jina-reranker:
113114
# 多模态多语言的重排模型,这个模型task_type 只能是 auto
114115
alias: null
@@ -122,9 +123,8 @@ models:
122123
workers:
123124
- gpus:
124125
- 5
125-
126+
# --------------- 支持的文本embedding模型样例 ---------------
126127
- acge_text_embedding:
127-
# 文本embedding模型
128128
alias: text-embedding-ada-002 # 别名
129129
enable: true # false true
130130
model_config:
@@ -136,9 +136,8 @@ models:
136136
workers:
137137
- gpus:
138138
- 2
139-
139+
# --------------- 支持的vl-embedding 模型样例 ---------------
140140
- bge-vl:
141-
# vl-embedding 模型
142141
alias: null
143142
enable: true
144143
model_config:
@@ -149,9 +148,8 @@ models:
149148
workers:
150149
- gpus:
151150
- 2
152-
151+
# --------------- 支持的文本审核模型样例 ---------------
153152
- text-moderation:
154-
# 文本审核模型
155153
alias: omni-moderation-latest
156154
enable: true
157155
model_config:
@@ -162,8 +160,8 @@ models:
162160
workers:
163161
- gpus:
164162
- 2
163+
# --------------- 支持的最新支持ASR模型样例 ---------------
165164
- SenseVoiceSmall:
166-
## 最新支持ASR模型
167165
alias: null
168166
enable: true
169167
model_config:
@@ -175,8 +173,8 @@ models:
175173
workers:
176174
- gpus:
177175
- 2
176+
# --------------- 支持的TTS 模型的配置方式样例 ---------------
178177
- tts:
179-
# TTS 模型的配置方式
180178
alias: null
181179
enable: true
182180
model_config:
@@ -187,9 +185,9 @@ models:
187185
workers:
188186
- gpus:
189187
- 6
190-
188+
# --------------- 支持的文生图模型样例 ---------------
191189
- flux:
192-
# 文生图模型
190+
193191
alias: null
194192
enable: true
195193
model_config:
@@ -201,8 +199,19 @@ models:
201199
- gpus:
202200
- 7
203201

202+
- qwen-image:
203+
alias: null
204+
enable: true
205+
model_config:
206+
model_name_or_path: /home/dev/model/Qwen/Qwen-Image/
207+
model_type: qwen_image
208+
work_mode: hf
209+
device: gpu
210+
workers:
211+
- gpus:
212+
- 7
213+
# --------------- 支持的图片编辑模型样例 ---------------
204214
- image-edit:
205-
# 图片编辑模型
206215
alias: null
207216
enable: true
208217
model_config:

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ dependencies = [
2828
"sglang[all]>=0.5.3.post1",
2929
"flashinfer-python",
3030
"flashtts>=0.1.7",
31-
"diffusers>=0.35.1",
31+
"diffusers>=0.35.2",
3232
#"sqlmodel>=0.0.24",
3333
"autoawq>=0.2.9",
3434
]
@@ -38,7 +38,7 @@ default-groups = [] # 默认只安装dependencies中的库
3838
prerelease = "allow"
3939
override-dependencies = [
4040
"setuptools==77.0.3",
41-
"transformers==4.57.0", # infinity-emb
41+
"transformers==4.57.1", # infinity-emb
4242
"soundfile==0.13.1", # infinity
4343
"xgrammar==0.1.24", # sglang[all]==0.4.5 depends on xgrammar==0.1.17
4444
"outlines-core==0.2.11", # sglang 和 vllm 的冲突

uv.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)