Skip to content

Commit 79e9454

Browse files
authored
feat(api-nodes): add WAN2.6 ReferenceToVideo (#11644)
1 parent ce0000c commit 79e9454

File tree

2 files changed

+161
-1
lines changed

2 files changed

+161
-1
lines changed

comfy_api_nodes/nodes_wan.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
poll_op,
1414
sync_op,
1515
tensor_to_base64_string,
16+
upload_video_to_comfyapi,
1617
validate_audio_duration,
18+
validate_video_duration,
1719
)
1820

1921

@@ -41,6 +43,12 @@ class Image2VideoInputField(BaseModel):
4143
audio_url: str | None = Field(None)
4244

4345

46+
class Reference2VideoInputField(BaseModel):
47+
prompt: str = Field(...)
48+
negative_prompt: str | None = Field(None)
49+
reference_video_urls: list[str] = Field(...)
50+
51+
4452
class Txt2ImageParametersField(BaseModel):
4553
size: str = Field(...)
4654
n: int = Field(1, description="Number of images to generate.") # we support only value=1
@@ -76,6 +84,14 @@ class Image2VideoParametersField(BaseModel):
7684
shot_type: str = Field("single")
7785

7886

87+
class Reference2VideoParametersField(BaseModel):
88+
size: str = Field(...)
89+
duration: int = Field(5, ge=5, le=15)
90+
shot_type: str = Field("single")
91+
seed: int = Field(..., ge=0, le=2147483647)
92+
watermark: bool = Field(False)
93+
94+
7995
class Text2ImageTaskCreationRequest(BaseModel):
8096
model: str = Field(...)
8197
input: Text2ImageInputField = Field(...)
@@ -100,6 +116,12 @@ class Image2VideoTaskCreationRequest(BaseModel):
100116
parameters: Image2VideoParametersField = Field(...)
101117

102118

119+
class Reference2VideoTaskCreationRequest(BaseModel):
120+
model: str = Field(...)
121+
input: Reference2VideoInputField = Field(...)
122+
parameters: Reference2VideoParametersField = Field(...)
123+
124+
103125
class TaskCreationOutputField(BaseModel):
104126
task_id: str = Field(...)
105127
task_status: str = Field(...)
@@ -721,6 +743,143 @@ async def execute(
721743
return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
722744

723745

746+
class WanReferenceVideoApi(IO.ComfyNode):
747+
@classmethod
748+
def define_schema(cls):
749+
return IO.Schema(
750+
node_id="WanReferenceVideoApi",
751+
display_name="Wan Reference to Video",
752+
category="api node/video/Wan",
753+
description="Use the character and voice from input videos, combined with a prompt, "
754+
"to generate a new video that maintains character consistency.",
755+
inputs=[
756+
IO.Combo.Input("model", options=["wan2.6-r2v"]),
757+
IO.String.Input(
758+
"prompt",
759+
multiline=True,
760+
default="",
761+
tooltip="Prompt describing the elements and visual features. Supports English and Chinese. "
762+
"Use identifiers such as `character1` and `character2` to refer to the reference characters.",
763+
),
764+
IO.String.Input(
765+
"negative_prompt",
766+
multiline=True,
767+
default="",
768+
tooltip="Negative prompt describing what to avoid.",
769+
),
770+
IO.Autogrow.Input(
771+
"reference_videos",
772+
template=IO.Autogrow.TemplateNames(
773+
IO.Video.Input("reference_video"),
774+
names=["character1", "character2", "character3"],
775+
min=1,
776+
),
777+
),
778+
IO.Combo.Input(
779+
"size",
780+
options=[
781+
"720p: 1:1 (960x960)",
782+
"720p: 16:9 (1280x720)",
783+
"720p: 9:16 (720x1280)",
784+
"720p: 4:3 (1088x832)",
785+
"720p: 3:4 (832x1088)",
786+
"1080p: 1:1 (1440x1440)",
787+
"1080p: 16:9 (1920x1080)",
788+
"1080p: 9:16 (1080x1920)",
789+
"1080p: 4:3 (1632x1248)",
790+
"1080p: 3:4 (1248x1632)",
791+
],
792+
),
793+
IO.Int.Input(
794+
"duration",
795+
default=5,
796+
min=5,
797+
max=10,
798+
step=5,
799+
display_mode=IO.NumberDisplay.slider,
800+
),
801+
IO.Int.Input(
802+
"seed",
803+
default=0,
804+
min=0,
805+
max=2147483647,
806+
step=1,
807+
display_mode=IO.NumberDisplay.number,
808+
control_after_generate=True,
809+
),
810+
IO.Combo.Input(
811+
"shot_type",
812+
options=["single", "multi"],
813+
tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
814+
"single continuous shot or multiple shots with cuts.",
815+
),
816+
IO.Boolean.Input(
817+
"watermark",
818+
default=False,
819+
tooltip="Whether to add an AI-generated watermark to the result.",
820+
),
821+
],
822+
outputs=[
823+
IO.Video.Output(),
824+
],
825+
hidden=[
826+
IO.Hidden.auth_token_comfy_org,
827+
IO.Hidden.api_key_comfy_org,
828+
IO.Hidden.unique_id,
829+
],
830+
is_api_node=True,
831+
)
832+
833+
@classmethod
834+
async def execute(
835+
cls,
836+
model: str,
837+
prompt: str,
838+
negative_prompt: str,
839+
reference_videos: IO.Autogrow.Type,
840+
size: str,
841+
duration: int,
842+
seed: int,
843+
shot_type: str,
844+
watermark: bool,
845+
):
846+
reference_video_urls = []
847+
for i in reference_videos:
848+
validate_video_duration(reference_videos[i], min_duration=2, max_duration=30)
849+
for i in reference_videos:
850+
reference_video_urls.append(await upload_video_to_comfyapi(cls, reference_videos[i]))
851+
width, height = RES_IN_PARENS.search(size).groups()
852+
initial_response = await sync_op(
853+
cls,
854+
ApiEndpoint(path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis", method="POST"),
855+
response_model=TaskCreationResponse,
856+
data=Reference2VideoTaskCreationRequest(
857+
model=model,
858+
input=Reference2VideoInputField(
859+
prompt=prompt, negative_prompt=negative_prompt, reference_video_urls=reference_video_urls
860+
),
861+
parameters=Reference2VideoParametersField(
862+
size=f"{width}*{height}",
863+
duration=duration,
864+
shot_type=shot_type,
865+
watermark=watermark,
866+
seed=seed,
867+
),
868+
),
869+
)
870+
if not initial_response.output:
871+
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
872+
response = await poll_op(
873+
cls,
874+
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
875+
response_model=VideoTaskStatusResponse,
876+
status_extractor=lambda x: x.output.task_status,
877+
poll_interval=6,
878+
max_poll_attempts=280,
879+
)
880+
return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
881+
882+
724883
class WanApiExtension(ComfyExtension):
725884
@override
726885
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -729,6 +888,7 @@ async def get_node_list(self) -> list[type[IO.ComfyNode]]:
729888
WanImageToImageApi,
730889
WanTextToVideoApi,
731890
WanImageToVideoApi,
891+
WanReferenceVideoApi,
732892
]
733893

734894

comfy_api_nodes/util/upload_helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ async def upload_video_to_comfyapi(
119119
raise ValueError(f"Could not verify video duration from source: {e}") from e
120120

121121
upload_mime_type = f"video/{container.value.lower()}"
122-
filename = f"uploaded_video.{container.value.lower()}"
122+
filename = f"{uuid.uuid4()}.{container.value.lower()}"
123123

124124
# Convert VideoInput to BytesIO using specified container/codec
125125
video_bytes_io = BytesIO()

0 commit comments

Comments
 (0)