Skip to content

Commit 87c104b

Browse files
authored
add support for "@image" reference format in Kling Omni API nodes (#11082)
1 parent 19f2192 commit 87c104b

File tree

2 files changed

+155
-13
lines changed

2 files changed

+155
-13
lines changed

comfy_api_nodes/apis/kling_api.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,41 @@ class TaskStatusVideoResult(BaseModel):
4646
url: str | None = Field(None, description="URL for generated video")
4747

4848

49-
class TaskStatusVideoResults(BaseModel):
49+
class TaskStatusImageResult(BaseModel):
50+
index: int = Field(..., description="Image Number,0-9")
51+
url: str = Field(..., description="URL for generated image")
52+
53+
54+
class OmniTaskStatusResults(BaseModel):
5055
videos: list[TaskStatusVideoResult] | None = Field(None)
56+
images: list[TaskStatusImageResult] | None = Field(None)
5157

5258

53-
class TaskStatusVideoResponseData(BaseModel):
59+
class OmniTaskStatusResponseData(BaseModel):
5460
created_at: int | None = Field(None, description="Task creation time")
5561
updated_at: int | None = Field(None, description="Task update time")
5662
task_status: str | None = None
5763
task_status_msg: str | None = Field(None, description="Additional failure reason. Only for polling endpoint.")
5864
task_id: str | None = Field(None, description="Task ID")
59-
task_result: TaskStatusVideoResults | None = Field(None)
65+
task_result: OmniTaskStatusResults | None = Field(None)
6066

6167

62-
class TaskStatusVideoResponse(BaseModel):
68+
class OmniTaskStatusResponse(BaseModel):
6369
code: int | None = Field(None, description="Error code")
6470
message: str | None = Field(None, description="Error message")
6571
request_id: str | None = Field(None, description="Request ID")
66-
data: TaskStatusVideoResponseData | None = Field(None)
72+
data: OmniTaskStatusResponseData | None = Field(None)
73+
74+
75+
class OmniImageParamImage(BaseModel):
76+
image: str = Field(...)
77+
78+
79+
class OmniProImageRequest(BaseModel):
80+
model_name: str = Field(..., description="kling-image-o1")
81+
resolution: str = Field(..., description="'1k' or '2k'")
82+
aspect_ratio: str | None = Field(...)
83+
prompt: str = Field(...)
84+
mode: str = Field("pro")
85+
n: int | None = Field(1, le=9)
86+
image_list: list[OmniImageParamImage] | None = Field(..., max_length=10)

comfy_api_nodes/nodes_kling.py

Lines changed: 130 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import logging
88
import math
9+
import re
910

1011
import torch
1112
from typing_extensions import override
@@ -49,12 +50,14 @@
4950
KlingSingleImageEffectModelName,
5051
)
5152
from comfy_api_nodes.apis.kling_api import (
53+
OmniImageParamImage,
5254
OmniParamImage,
5355
OmniParamVideo,
5456
OmniProFirstLastFrameRequest,
57+
OmniProImageRequest,
5558
OmniProReferences2VideoRequest,
5659
OmniProText2VideoRequest,
57-
TaskStatusVideoResponse,
60+
OmniTaskStatusResponse,
5861
)
5962
from comfy_api_nodes.util import (
6063
ApiEndpoint,
@@ -210,16 +213,46 @@
210213
}
211214

212215

213-
async def finish_omni_video_task(cls: type[IO.ComfyNode], response: TaskStatusVideoResponse) -> IO.NodeOutput:
216+
def normalize_omni_prompt_references(prompt: str) -> str:
217+
"""
218+
Rewrites Kling Omni-style placeholders used in the app, like:
219+
220+
@image, @image1, @image2, ... @imageN
221+
@video, @video1, @video2, ... @videoN
222+
223+
into the API-compatible form:
224+
225+
<<<image_1>>>, <<<image_2>>>, ...
226+
<<<video_1>>>, <<<video_2>>>, ...
227+
228+
This is a UX shim for ComfyUI so users can type the same syntax as in the Kling app.
229+
"""
230+
if not prompt:
231+
return prompt
232+
233+
def _image_repl(match):
234+
return f"<<<image_{match.group('idx') or '1'}>>>"
235+
236+
def _video_repl(match):
237+
return f"<<<video_{match.group('idx') or '1'}>>>"
238+
239+
# (?<!\w) avoids matching e.g. "[email protected]"
240+
# (?!\w) makes sure we only match @image / @image<digits> and not @imageFoo
241+
prompt = re.sub(r"(?<!\w)@image(?P<idx>\d*)(?!\w)", _image_repl, prompt)
242+
return re.sub(r"(?<!\w)@video(?P<idx>\d*)(?!\w)", _video_repl, prompt)
243+
244+
245+
async def finish_omni_video_task(cls: type[IO.ComfyNode], response: OmniTaskStatusResponse) -> IO.NodeOutput:
214246
if response.code:
215247
raise RuntimeError(
216248
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
217249
)
218250
final_response = await poll_op(
219251
cls,
220252
ApiEndpoint(path=f"/proxy/kling/v1/videos/omni-video/{response.data.task_id}"),
221-
response_model=TaskStatusVideoResponse,
253+
response_model=OmniTaskStatusResponse,
222254
status_extractor=lambda r: (r.data.task_status if r.data else None),
255+
max_poll_attempts=160,
223256
)
224257
return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
225258

@@ -801,7 +834,7 @@ async def execute(
801834
response = await sync_op(
802835
cls,
803836
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
804-
response_model=TaskStatusVideoResponse,
837+
response_model=OmniTaskStatusResponse,
805838
data=OmniProText2VideoRequest(
806839
model_name=model_name,
807840
prompt=prompt,
@@ -864,6 +897,7 @@ async def execute(
864897
end_frame: Input.Image | None = None,
865898
reference_images: Input.Image | None = None,
866899
) -> IO.NodeOutput:
900+
prompt = normalize_omni_prompt_references(prompt)
867901
validate_string(prompt, min_length=1, max_length=2500)
868902
if end_frame is not None and reference_images is not None:
869903
raise ValueError("The 'end_frame' input cannot be used simultaneously with 'reference_images'.")
@@ -895,7 +929,7 @@ async def execute(
895929
response = await sync_op(
896930
cls,
897931
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
898-
response_model=TaskStatusVideoResponse,
932+
response_model=OmniTaskStatusResponse,
899933
data=OmniProFirstLastFrameRequest(
900934
model_name=model_name,
901935
prompt=prompt,
@@ -950,6 +984,7 @@ async def execute(
950984
duration: int,
951985
reference_images: Input.Image,
952986
) -> IO.NodeOutput:
987+
prompt = normalize_omni_prompt_references(prompt)
953988
validate_string(prompt, min_length=1, max_length=2500)
954989
if get_number_of_images(reference_images) > 7:
955990
raise ValueError("The maximum number of reference images is 7.")
@@ -962,7 +997,7 @@ async def execute(
962997
response = await sync_op(
963998
cls,
964999
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
965-
response_model=TaskStatusVideoResponse,
1000+
response_model=OmniTaskStatusResponse,
9661001
data=OmniProReferences2VideoRequest(
9671002
model_name=model_name,
9681003
prompt=prompt,
@@ -1023,6 +1058,7 @@ async def execute(
10231058
keep_original_sound: bool,
10241059
reference_images: Input.Image | None = None,
10251060
) -> IO.NodeOutput:
1061+
prompt = normalize_omni_prompt_references(prompt)
10261062
validate_string(prompt, min_length=1, max_length=2500)
10271063
validate_video_duration(reference_video, min_duration=3.0, max_duration=10.05)
10281064
validate_video_dimensions(reference_video, min_width=720, min_height=720, max_width=2160, max_height=2160)
@@ -1045,7 +1081,7 @@ async def execute(
10451081
response = await sync_op(
10461082
cls,
10471083
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
1048-
response_model=TaskStatusVideoResponse,
1084+
response_model=OmniTaskStatusResponse,
10491085
data=OmniProReferences2VideoRequest(
10501086
model_name=model_name,
10511087
prompt=prompt,
@@ -1103,6 +1139,7 @@ async def execute(
11031139
keep_original_sound: bool,
11041140
reference_images: Input.Image | None = None,
11051141
) -> IO.NodeOutput:
1142+
prompt = normalize_omni_prompt_references(prompt)
11061143
validate_string(prompt, min_length=1, max_length=2500)
11071144
validate_video_duration(video, min_duration=3.0, max_duration=10.05)
11081145
validate_video_dimensions(video, min_width=720, min_height=720, max_width=2160, max_height=2160)
@@ -1125,7 +1162,7 @@ async def execute(
11251162
response = await sync_op(
11261163
cls,
11271164
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
1128-
response_model=TaskStatusVideoResponse,
1165+
response_model=OmniTaskStatusResponse,
11291166
data=OmniProReferences2VideoRequest(
11301167
model_name=model_name,
11311168
prompt=prompt,
@@ -1138,6 +1175,90 @@ async def execute(
11381175
return await finish_omni_video_task(cls, response)
11391176

11401177

1178+
class OmniProImageNode(IO.ComfyNode):
1179+
1180+
@classmethod
1181+
def define_schema(cls) -> IO.Schema:
1182+
return IO.Schema(
1183+
node_id="KlingOmniProImageNode",
1184+
display_name="Kling Omni Image (Pro)",
1185+
category="api node/image/Kling",
1186+
description="Create or edit images with the latest model from Kling.",
1187+
inputs=[
1188+
IO.Combo.Input("model_name", options=["kling-image-o1"]),
1189+
IO.String.Input(
1190+
"prompt",
1191+
multiline=True,
1192+
tooltip="A text prompt describing the image content. "
1193+
"This can include both positive and negative descriptions.",
1194+
),
1195+
IO.Combo.Input("resolution", options=["1K", "2K"]),
1196+
IO.Combo.Input(
1197+
"aspect_ratio",
1198+
options=["16:9", "9:16", "1:1", "4:3", "3:4", "3:2", "2:3", "21:9"],
1199+
),
1200+
IO.Image.Input(
1201+
"reference_images",
1202+
tooltip="Up to 10 additional reference images.",
1203+
optional=True,
1204+
),
1205+
],
1206+
outputs=[
1207+
IO.Image.Output(),
1208+
],
1209+
hidden=[
1210+
IO.Hidden.auth_token_comfy_org,
1211+
IO.Hidden.api_key_comfy_org,
1212+
IO.Hidden.unique_id,
1213+
],
1214+
is_api_node=True,
1215+
)
1216+
1217+
@classmethod
1218+
async def execute(
1219+
cls,
1220+
model_name: str,
1221+
prompt: str,
1222+
resolution: str,
1223+
aspect_ratio: str,
1224+
reference_images: Input.Image | None = None,
1225+
) -> IO.NodeOutput:
1226+
prompt = normalize_omni_prompt_references(prompt)
1227+
validate_string(prompt, min_length=1, max_length=2500)
1228+
image_list: list[OmniImageParamImage] = []
1229+
if reference_images is not None:
1230+
if get_number_of_images(reference_images) > 10:
1231+
raise ValueError("The maximum number of reference images is 10.")
1232+
for i in reference_images:
1233+
validate_image_dimensions(i, min_width=300, min_height=300)
1234+
validate_image_aspect_ratio(i, (1, 2.5), (2.5, 1))
1235+
for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference image"):
1236+
image_list.append(OmniImageParamImage(image=i))
1237+
response = await sync_op(
1238+
cls,
1239+
ApiEndpoint(path="/proxy/kling/v1/images/omni-image", method="POST"),
1240+
response_model=OmniTaskStatusResponse,
1241+
data=OmniProImageRequest(
1242+
model_name=model_name,
1243+
prompt=prompt,
1244+
resolution=resolution.lower(),
1245+
aspect_ratio=aspect_ratio,
1246+
image_list=image_list if image_list else None,
1247+
),
1248+
)
1249+
if response.code:
1250+
raise RuntimeError(
1251+
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
1252+
)
1253+
final_response = await poll_op(
1254+
cls,
1255+
ApiEndpoint(path=f"/proxy/kling/v1/images/omni-image/{response.data.task_id}"),
1256+
response_model=OmniTaskStatusResponse,
1257+
status_extractor=lambda r: (r.data.task_status if r.data else None),
1258+
)
1259+
return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.task_result.images[0].url))
1260+
1261+
11411262
class KlingCameraControlT2VNode(IO.ComfyNode):
11421263
"""
11431264
Kling Text to Video Camera Control Node. This node is a text to video node, but it supports controlling the camera.
@@ -1935,6 +2056,7 @@ async def get_node_list(self) -> list[type[IO.ComfyNode]]:
19352056
OmniProImageToVideoNode,
19362057
OmniProVideoToVideoNode,
19372058
OmniProEditVideoNode,
2059+
# OmniProImageNode, # need support from backend
19382060
]
19392061

19402062

0 commit comments

Comments
 (0)