1313 poll_op ,
1414 sync_op ,
1515 tensor_to_base64_string ,
16+ upload_video_to_comfyapi ,
1617 validate_audio_duration ,
18+ validate_video_duration ,
1719)
1820
1921
@@ -41,6 +43,12 @@ class Image2VideoInputField(BaseModel):
4143 audio_url : str | None = Field (None )
4244
4345
46+ class Reference2VideoInputField (BaseModel ):
47+ prompt : str = Field (...)
48+ negative_prompt : str | None = Field (None )
49+ reference_video_urls : list [str ] = Field (...)
50+
51+
4452class Txt2ImageParametersField (BaseModel ):
4553 size : str = Field (...)
4654 n : int = Field (1 , description = "Number of images to generate." ) # we support only value=1
@@ -76,6 +84,14 @@ class Image2VideoParametersField(BaseModel):
7684 shot_type : str = Field ("single" )
7785
7886
87+ class Reference2VideoParametersField (BaseModel ):
88+ size : str = Field (...)
89+ duration : int = Field (5 , ge = 5 , le = 15 )
90+ shot_type : str = Field ("single" )
91+ seed : int = Field (..., ge = 0 , le = 2147483647 )
92+ watermark : bool = Field (False )
93+
94+
7995class Text2ImageTaskCreationRequest (BaseModel ):
8096 model : str = Field (...)
8197 input : Text2ImageInputField = Field (...)
@@ -100,6 +116,12 @@ class Image2VideoTaskCreationRequest(BaseModel):
100116 parameters : Image2VideoParametersField = Field (...)
101117
102118
119+ class Reference2VideoTaskCreationRequest (BaseModel ):
120+ model : str = Field (...)
121+ input : Reference2VideoInputField = Field (...)
122+ parameters : Reference2VideoParametersField = Field (...)
123+
124+
103125class TaskCreationOutputField (BaseModel ):
104126 task_id : str = Field (...)
105127 task_status : str = Field (...)
@@ -721,6 +743,143 @@ async def execute(
721743 return IO .NodeOutput (await download_url_to_video_output (response .output .video_url ))
722744
723745
746+ class WanReferenceVideoApi (IO .ComfyNode ):
747+ @classmethod
748+ def define_schema (cls ):
749+ return IO .Schema (
750+ node_id = "WanReferenceVideoApi" ,
751+ display_name = "Wan Reference to Video" ,
752+ category = "api node/video/Wan" ,
753+ description = "Use the character and voice from input videos, combined with a prompt, "
754+ "to generate a new video that maintains character consistency." ,
755+ inputs = [
756+ IO .Combo .Input ("model" , options = ["wan2.6-r2v" ]),
757+ IO .String .Input (
758+ "prompt" ,
759+ multiline = True ,
760+ default = "" ,
761+ tooltip = "Prompt describing the elements and visual features. Supports English and Chinese. "
762+ "Use identifiers such as `character1` and `character2` to refer to the reference characters." ,
763+ ),
764+ IO .String .Input (
765+ "negative_prompt" ,
766+ multiline = True ,
767+ default = "" ,
768+ tooltip = "Negative prompt describing what to avoid." ,
769+ ),
770+ IO .Autogrow .Input (
771+ "reference_videos" ,
772+ template = IO .Autogrow .TemplateNames (
773+ IO .Video .Input ("reference_video" ),
774+ names = ["character1" , "character2" , "character3" ],
775+ min = 1 ,
776+ ),
777+ ),
778+ IO .Combo .Input (
779+ "size" ,
780+ options = [
781+ "720p: 1:1 (960x960)" ,
782+ "720p: 16:9 (1280x720)" ,
783+ "720p: 9:16 (720x1280)" ,
784+ "720p: 4:3 (1088x832)" ,
785+ "720p: 3:4 (832x1088)" ,
786+ "1080p: 1:1 (1440x1440)" ,
787+ "1080p: 16:9 (1920x1080)" ,
788+ "1080p: 9:16 (1080x1920)" ,
789+ "1080p: 4:3 (1632x1248)" ,
790+ "1080p: 3:4 (1248x1632)" ,
791+ ],
792+ ),
793+ IO .Int .Input (
794+ "duration" ,
795+ default = 5 ,
796+ min = 5 ,
797+ max = 10 ,
798+ step = 5 ,
799+ display_mode = IO .NumberDisplay .slider ,
800+ ),
801+ IO .Int .Input (
802+ "seed" ,
803+ default = 0 ,
804+ min = 0 ,
805+ max = 2147483647 ,
806+ step = 1 ,
807+ display_mode = IO .NumberDisplay .number ,
808+ control_after_generate = True ,
809+ ),
810+ IO .Combo .Input (
811+ "shot_type" ,
812+ options = ["single" , "multi" ],
813+ tooltip = "Specifies the shot type for the generated video, that is, whether the video is a "
814+ "single continuous shot or multiple shots with cuts." ,
815+ ),
816+ IO .Boolean .Input (
817+ "watermark" ,
818+ default = False ,
819+ tooltip = "Whether to add an AI-generated watermark to the result." ,
820+ ),
821+ ],
822+ outputs = [
823+ IO .Video .Output (),
824+ ],
825+ hidden = [
826+ IO .Hidden .auth_token_comfy_org ,
827+ IO .Hidden .api_key_comfy_org ,
828+ IO .Hidden .unique_id ,
829+ ],
830+ is_api_node = True ,
831+ )
832+
833+ @classmethod
834+ async def execute (
835+ cls ,
836+ model : str ,
837+ prompt : str ,
838+ negative_prompt : str ,
839+ reference_videos : IO .Autogrow .Type ,
840+ size : str ,
841+ duration : int ,
842+ seed : int ,
843+ shot_type : str ,
844+ watermark : bool ,
845+ ):
846+ reference_video_urls = []
847+ for i in reference_videos :
848+ validate_video_duration (reference_videos [i ], min_duration = 2 , max_duration = 30 )
849+ for i in reference_videos :
850+ reference_video_urls .append (await upload_video_to_comfyapi (cls , reference_videos [i ]))
851+ width , height = RES_IN_PARENS .search (size ).groups ()
852+ initial_response = await sync_op (
853+ cls ,
854+ ApiEndpoint (path = "/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis" , method = "POST" ),
855+ response_model = TaskCreationResponse ,
856+ data = Reference2VideoTaskCreationRequest (
857+ model = model ,
858+ input = Reference2VideoInputField (
859+ prompt = prompt , negative_prompt = negative_prompt , reference_video_urls = reference_video_urls
860+ ),
861+ parameters = Reference2VideoParametersField (
862+ size = f"{ width } *{ height } " ,
863+ duration = duration ,
864+ shot_type = shot_type ,
865+ watermark = watermark ,
866+ seed = seed ,
867+ ),
868+ ),
869+ )
870+ if not initial_response .output :
871+ raise Exception (f"An unknown error occurred: { initial_response .code } - { initial_response .message } " )
872+ response = await poll_op (
873+ cls ,
874+ ApiEndpoint (path = f"/proxy/wan/api/v1/tasks/{ initial_response .output .task_id } " ),
875+ response_model = VideoTaskStatusResponse ,
876+ status_extractor = lambda x : x .output .task_status ,
877+ poll_interval = 6 ,
878+ max_poll_attempts = 280 ,
879+ )
880+ return IO .NodeOutput (await download_url_to_video_output (response .output .video_url ))
881+
882+
724883class WanApiExtension (ComfyExtension ):
725884 @override
726885 async def get_node_list (self ) -> list [type [IO .ComfyNode ]]:
@@ -729,6 +888,7 @@ async def get_node_list(self) -> list[type[IO.ComfyNode]]:
729888 WanImageToImageApi ,
730889 WanTextToVideoApi ,
731890 WanImageToVideoApi ,
891+ WanReferenceVideoApi ,
732892 ]
733893
734894
0 commit comments