Skip to content

Commit e6d9f62

Browse files
Add Moonvalley Marey V2V node with updated input validation (#9069)
* [moonvalley] Update V2V node to match API specification - Add exact resolution validation for supported resolutions (1920x1080, 1080x1920, 1152x1152, 1536x1152, 1152x1536) - Change frame count validation from divisible by 32 to 16 - Add MP4 container format validation - Remove internal parameters (steps, guidance_scale) from V2V inference params - Update video duration handling to support only 5 seconds (auto-trim if longer) - Add motion_intensity parameter (0-100) for Motion Transfer control type - Add get_container_format() method to VideoInput classes * update negative prompt
1 parent 78672d0 commit e6d9f62

File tree

3 files changed

+145
-105
lines changed

3 files changed

+145
-105
lines changed

comfy_api/input/video_types.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from abc import ABC, abstractmethod
33
from typing import Optional, Union
44
import io
5+
import av
56
from comfy_api.util import VideoContainer, VideoCodec, VideoComponents
67

78
class VideoInput(ABC):
@@ -70,3 +71,15 @@ def get_duration(self) -> float:
7071
components = self.get_components()
7172
frame_count = components.images.shape[0]
7273
return float(frame_count / components.frame_rate)
74+
75+
def get_container_format(self) -> str:
76+
"""
77+
Returns the container format of the video (e.g., 'mp4', 'mov', 'avi').
78+
79+
Returns:
80+
Container format as string
81+
"""
82+
# Default implementation - subclasses should override for better performance
83+
source = self.get_stream_source()
84+
with av.open(source, mode="r") as container:
85+
return container.format.name

comfy_api/input_impl/video_types.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,18 @@ def get_duration(self) -> float:
121121

122122
raise ValueError(f"Could not determine duration for file '{self.__file}'")
123123

124+
def get_container_format(self) -> str:
125+
"""
126+
Returns the container format of the video (e.g., 'mp4', 'mov', 'avi').
127+
128+
Returns:
129+
Container format as string
130+
"""
131+
if isinstance(self.__file, io.BytesIO):
132+
self.__file.seek(0)
133+
with av.open(self.__file, mode='r') as container:
134+
return container.format.name
135+
124136
def get_components_internal(self, container: InputContainer) -> VideoComponents:
125137
# Get video frames
126138
frames = []

comfy_api_nodes/nodes_moonvalley.py

Lines changed: 120 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from comfy_api_nodes.util.validation_utils import (
66
get_image_dimensions,
77
validate_image_dimensions,
8-
validate_video_dimensions,
98
)
109

1110

@@ -176,54 +175,76 @@ def validate_input_image(
176175
)
177176

178177

179-
def validate_input_video(
180-
video: VideoInput, num_frames_out: int, with_frame_conditioning: bool = False
181-
):
178+
def validate_video_to_video_input(video: VideoInput) -> VideoInput:
179+
"""
180+
Validates and processes video input for Moonvalley Video-to-Video generation.
181+
182+
Args:
183+
video: Input video to validate
184+
185+
Returns:
186+
Validated and potentially trimmed video
187+
188+
Raises:
189+
ValueError: If video doesn't meet requirements
190+
MoonvalleyApiError: If video duration is too short
191+
"""
192+
width, height = _get_video_dimensions(video)
193+
_validate_video_dimensions(width, height)
194+
_validate_container_format(video)
195+
196+
return _validate_and_trim_duration(video)
197+
198+
199+
def _get_video_dimensions(video: VideoInput) -> tuple[int, int]:
200+
"""Extracts video dimensions with error handling."""
182201
try:
183-
width, height = video.get_dimensions()
202+
return video.get_dimensions()
184203
except Exception as e:
185204
logging.error("Error getting dimensions of video: %s", e)
186205
raise ValueError(f"Cannot get video dimensions: {e}") from e
187206

188-
validate_input_media(width, height, with_frame_conditioning)
189-
validate_video_dimensions(
190-
video,
191-
min_width=MIN_VID_WIDTH,
192-
min_height=MIN_VID_HEIGHT,
193-
max_width=MAX_VID_WIDTH,
194-
max_height=MAX_VID_HEIGHT,
195-
)
196207

197-
trimmed_video = validate_input_video_length(video, num_frames_out)
198-
return trimmed_video
208+
def _validate_video_dimensions(width: int, height: int) -> None:
209+
"""Validates video dimensions meet Moonvalley V2V requirements."""
210+
supported_resolutions = {
211+
(1920, 1080), (1080, 1920), (1152, 1152),
212+
(1536, 1152), (1152, 1536)
213+
}
199214

215+
if (width, height) not in supported_resolutions:
216+
supported_list = ', '.join([f'{w}x{h}' for w, h in sorted(supported_resolutions)])
217+
raise ValueError(f"Resolution {width}x{height} not supported. Supported: {supported_list}")
200218

201-
def validate_input_video_length(video: VideoInput, num_frames: int):
202219

203-
if video.get_duration() > 60:
204-
raise MoonvalleyApiError(
205-
"Input Video lenth should be less than 1min. Please trim."
206-
)
220+
def _validate_container_format(video: VideoInput) -> None:
221+
"""Validates video container format is MP4."""
222+
container_format = video.get_container_format()
223+
if container_format not in ['mp4', 'mov,mp4,m4a,3gp,3g2,mj2']:
224+
raise ValueError(f"Only MP4 container format supported. Got: {container_format}")
207225

208-
if num_frames == 128:
209-
if video.get_duration() < 5:
210-
raise MoonvalleyApiError(
211-
"Input Video length is less than 5s. Please use a video longer than or equal to 5s."
212-
)
213-
if video.get_duration() > 5:
214-
# trim video to 5s
215-
video = trim_video(video, 5)
216-
if num_frames == 256:
217-
if video.get_duration() < 10:
218-
raise MoonvalleyApiError(
219-
"Input Video length is less than 10s. Please use a video longer than or equal to 10s."
220-
)
221-
if video.get_duration() > 10:
222-
# trim video to 10s
223-
video = trim_video(video, 10)
226+
227+
def _validate_and_trim_duration(video: VideoInput) -> VideoInput:
228+
"""Validates video duration and trims to 5 seconds if needed."""
229+
duration = video.get_duration()
230+
_validate_minimum_duration(duration)
231+
return _trim_if_too_long(video, duration)
232+
233+
234+
def _validate_minimum_duration(duration: float) -> None:
235+
"""Ensures video is at least 5 seconds long."""
236+
if duration < 5:
237+
raise MoonvalleyApiError("Input video must be at least 5 seconds long.")
238+
239+
240+
def _trim_if_too_long(video: VideoInput, duration: float) -> VideoInput:
241+
"""Trims video to 5 seconds if longer."""
242+
if duration > 5:
243+
return trim_video(video, 5)
224244
return video
225245

226246

247+
227248
def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
228249
"""
229250
Returns a new VideoInput object trimmed from the beginning to the specified duration,
@@ -278,15 +299,13 @@ def trim_video(video: VideoInput, duration_sec: float) -> VideoInput:
278299
f"Added audio stream: {stream.sample_rate}Hz, {stream.channels} channels"
279300
)
280301

281-
# Calculate target frame count that's divisible by 32
302+
# Calculate target frame count that's divisible by 16
282303
fps = input_container.streams.video[0].average_rate
283304
estimated_frames = int(duration_sec * fps)
284-
target_frames = (
285-
estimated_frames // 32
286-
) * 32 # Round down to nearest multiple of 32
305+
target_frames = (estimated_frames // 16) * 16 # Round down to nearest multiple of 16
287306

288307
if target_frames == 0:
289-
raise ValueError("Video too short: need at least 32 frames for Moonvalley")
308+
raise ValueError("Video too short: need at least 16 frames for Moonvalley")
290309

291310
frame_count = 0
292311
audio_frame_count = 0
@@ -353,8 +372,8 @@ def parseWidthHeightFromRes(self, resolution: str):
353372
"16:9 (1920 x 1080)": {"width": 1920, "height": 1080},
354373
"9:16 (1080 x 1920)": {"width": 1080, "height": 1920},
355374
"1:1 (1152 x 1152)": {"width": 1152, "height": 1152},
356-
"4:3 (1440 x 1080)": {"width": 1440, "height": 1080},
357-
"3:4 (1080 x 1440)": {"width": 1080, "height": 1440},
375+
"4:3 (1536 x 1152)": {"width": 1536, "height": 1152},
376+
"3:4 (1152 x 1536)": {"width": 1152, "height": 1536},
358377
"21:9 (2560 x 1080)": {"width": 2560, "height": 1080},
359378
}
360379
if resolution in res_map:
@@ -494,7 +513,6 @@ def generate(
494513
image = kwargs.get("image", None)
495514
if image is None:
496515
raise MoonvalleyApiError("image is required")
497-
total_frames = get_total_frames_from_length()
498516

499517
validate_input_image(image, True)
500518
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
@@ -505,7 +523,7 @@ def generate(
505523
steps=kwargs.get("steps"),
506524
seed=kwargs.get("seed"),
507525
guidance_scale=kwargs.get("prompt_adherence"),
508-
num_frames=total_frames,
526+
num_frames=128,
509527
width=width_height.get("width"),
510528
height=width_height.get("height"),
511529
use_negative_prompts=True,
@@ -549,68 +567,76 @@ def __init__(self):
549567

550568
@classmethod
551569
def INPUT_TYPES(cls):
552-
input_types = super().INPUT_TYPES()
553-
for param in ["resolution", "image"]:
554-
if param in input_types["required"]:
555-
del input_types["required"][param]
556-
if param in input_types["optional"]:
557-
del input_types["optional"][param]
558-
input_types["optional"] = {
559-
"video": (
560-
IO.VIDEO,
561-
{
562-
"default": "",
563-
"multiline": False,
564-
"tooltip": "The reference video used to generate the output video. Input a 5s video for 128 frames and a 10s video for 256 frames. Longer videos will be trimmed automatically.",
565-
},
566-
),
567-
"control_type": (
568-
["Motion Transfer", "Pose Transfer"],
569-
{"default": "Motion Transfer"},
570-
),
571-
"motion_intensity": (
572-
"INT",
573-
{
574-
"default": 100,
575-
"step": 1,
576-
"min": 0,
577-
"max": 100,
578-
"tooltip": "Only used if control_type is 'Motion Transfer'",
579-
},
580-
),
570+
return {
571+
"required": {
572+
"prompt": model_field_to_node_input(
573+
IO.STRING, MoonvalleyVideoToVideoRequest, "prompt_text",
574+
multiline=True
575+
),
576+
"negative_prompt": model_field_to_node_input(
577+
IO.STRING,
578+
MoonvalleyVideoToVideoInferenceParams,
579+
"negative_prompt",
580+
multiline=True,
581+
default="low-poly, flat shader, bad rigging, stiff animation, uncanny eyes, low-quality textures, looping glitch, cheap effect, overbloom, bloom spam, default lighting, game asset, stiff face, ugly specular, AI artifacts"
582+
),
583+
"seed": model_field_to_node_input(IO.INT,MoonvalleyVideoToVideoInferenceParams, "seed", default=random.randint(0, 2**32 - 1), min=0, max=4294967295, step=1, display="number", tooltip="Random seed value", control_after_generate=True),
584+
},
585+
"hidden": {
586+
"auth_token": "AUTH_TOKEN_COMFY_ORG",
587+
"comfy_api_key": "API_KEY_COMFY_ORG",
588+
"unique_id": "UNIQUE_ID",
589+
},
590+
"optional": {
591+
"video": (IO.VIDEO, {"default": "", "multiline": False, "tooltip": "The reference video used to generate the output video. Must be at least 5 seconds long. Videos longer than 5s will be automatically trimmed. Only MP4 format supported."}),
592+
"control_type": (
593+
["Motion Transfer", "Pose Transfer"],
594+
{"default": "Motion Transfer"},
595+
),
596+
"motion_intensity": (
597+
"INT",
598+
{
599+
"default": 100,
600+
"step": 1,
601+
"min": 0,
602+
"max": 100,
603+
"tooltip": "Only used if control_type is 'Motion Transfer'",
604+
},
605+
)
606+
}
581607
}
582608

583-
return input_types
584-
585609
RETURN_TYPES = ("VIDEO",)
586610
RETURN_NAMES = ("video",)
587611

588612
def generate(
589613
self, prompt, negative_prompt, unique_id: Optional[str] = None, **kwargs
590614
):
591615
video = kwargs.get("video")
592-
num_frames = get_total_frames_from_length()
593616

594617
if not video:
595618
raise MoonvalleyApiError("video is required")
596619

597-
"""Validate video input"""
598620
video_url = ""
599621
if video:
600-
validated_video = validate_input_video(video, num_frames, False)
622+
validated_video = validate_video_to_video_input(video)
601623
video_url = upload_video_to_comfyapi(validated_video, auth_kwargs=kwargs)
602624

603625
control_type = kwargs.get("control_type")
604626
motion_intensity = kwargs.get("motion_intensity")
605627

606628
"""Validate prompts and inference input"""
607629
validate_prompts(prompt, negative_prompt)
608-
inference_params = MoonvalleyVideoToVideoInferenceParams(
630+
631+
# Only include motion_intensity for Motion Transfer
632+
control_params = {}
633+
if control_type == "Motion Transfer" and motion_intensity is not None:
634+
control_params['motion_intensity'] = motion_intensity
635+
636+
inference_params=MoonvalleyVideoToVideoInferenceParams(
609637
negative_prompt=negative_prompt,
610-
steps=kwargs.get("steps"),
611638
seed=kwargs.get("seed"),
612-
guidance_scale=kwargs.get("prompt_adherence"),
613-
control_params={"motion_intensity": motion_intensity},
639+
control_params=control_params
614640
)
615641

616642
control = self.parseControlParameter(control_type)
@@ -667,17 +693,16 @@ def generate(
667693
):
668694
validate_prompts(prompt, negative_prompt, MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
669695
width_height = self.parseWidthHeightFromRes(kwargs.get("resolution"))
670-
num_frames = get_total_frames_from_length()
671696

672-
inference_params = MoonvalleyTextToVideoInferenceParams(
673-
negative_prompt=negative_prompt,
674-
steps=kwargs.get("steps"),
675-
seed=kwargs.get("seed"),
676-
guidance_scale=kwargs.get("prompt_adherence"),
677-
num_frames=num_frames,
678-
width=width_height.get("width"),
679-
height=width_height.get("height"),
680-
)
697+
inference_params=MoonvalleyTextToVideoInferenceParams(
698+
negative_prompt=negative_prompt,
699+
steps=kwargs.get("steps"),
700+
seed=kwargs.get("seed"),
701+
guidance_scale=kwargs.get("prompt_adherence"),
702+
num_frames=128,
703+
width=width_height.get("width"),
704+
height=width_height.get("height"),
705+
)
681706
request = MoonvalleyTextToVideoRequest(
682707
prompt_text=prompt, inference_params=inference_params
683708
)
@@ -707,22 +732,12 @@ def generate(
707732
NODE_CLASS_MAPPINGS = {
708733
"MoonvalleyImg2VideoNode": MoonvalleyImg2VideoNode,
709734
"MoonvalleyTxt2VideoNode": MoonvalleyTxt2VideoNode,
710-
# "MoonvalleyVideo2VideoNode": MoonvalleyVideo2VideoNode,
735+
"MoonvalleyVideo2VideoNode": MoonvalleyVideo2VideoNode,
711736
}
712737

713738

714739
NODE_DISPLAY_NAME_MAPPINGS = {
715740
"MoonvalleyImg2VideoNode": "Moonvalley Marey Image to Video",
716741
"MoonvalleyTxt2VideoNode": "Moonvalley Marey Text to Video",
717-
# "MoonvalleyVideo2VideoNode": "Moonvalley Marey Video to Video",
742+
"MoonvalleyVideo2VideoNode": "Moonvalley Marey Video to Video",
718743
}
719-
720-
721-
def get_total_frames_from_length(length="5s"):
722-
# if length == '5s':
723-
# return 128
724-
# elif length == '10s':
725-
# return 256
726-
return 128
727-
# else:
728-
# raise MoonvalleyApiError("length is required")

0 commit comments

Comments
 (0)