Skip to content

Commit bb2cb13

Browse files
add width and height
1 parent 2151a3b commit bb2cb13

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

inference/cli_demo.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ def generate_video(
3737
lora_path: str = None,
3838
lora_rank: int = 128,
3939
num_frames: int = 81,
40+
width: int = 1360,
41+
height: int = 768,
4042
output_path: str = "./output.mp4",
4143
image_or_video_path: str = "",
4244
num_inference_steps: int = 50,
@@ -58,6 +60,8 @@ def generate_video(
5860
- output_path (str): The path where the generated video will be saved.
5961
- num_inference_steps (int): Number of steps for the inference process. More steps can result in better quality.
6062
- num_frames (int): Number of frames to generate. CogVideoX1.0 generates 49 frames for 6 seconds at 8 fps, while CogVideoX1.5 produces either 81 or 161 frames, corresponding to 5 seconds or 10 seconds at 16 fps.
63+
- width (int): The width of the generated video, applicable only for CogVideoX1.5-5B-I2V
64+
- height (int): The height of the generated video, applicable only for CogVideoX1.5-5B-I2V
6165
- guidance_scale (float): The scale for classifier-free guidance. Higher values can lead to better alignment with the prompt.
6266
- num_videos_per_prompt (int): Number of videos to generate per prompt.
6367
- dtype (torch.dtype): The data type for computation (default is torch.bfloat16).
@@ -111,8 +115,11 @@ def generate_video(
111115
# This is the default value for 6 seconds video and 8 fps and will plus 1 frame for the first frame and 49 frames.
112116
if generate_type == "i2v":
113117
video_generate = pipe(
118+
height=height,
119+
width=width,
114120
prompt=prompt,
115-
image=image, # The path of the image, the resolution of video will be the same as the image for CogVideoX1.5-5B-I2V, otherwise it will be 720 * 480
121+
image=image,
122+
# The path of the image, the resolution of video will be the same as the image for CogVideoX1.5-5B-I2V, otherwise it will be 720 * 480
116123
num_videos_per_prompt=num_videos_per_prompt, # Number of videos to generate per prompt
117124
num_inference_steps=num_inference_steps, # Number of inference steps
118125
num_frames=num_frames, # Number of frames to generate
@@ -162,6 +169,8 @@ def generate_video(
162169
parser.add_argument("--guidance_scale", type=float, default=6.0, help="The scale for classifier-free guidance")
163170
parser.add_argument("--num_inference_steps", type=int, default=50, help="Inference steps")
164171
parser.add_argument("--num_frames", type=int, default=81, help="Number of steps for the inference process")
172+
parser.add_argument("--width", type=int, default=1360, help="Number of steps for the inference process")
173+
parser.add_argument("--height", type=int, default=768, help="Number of steps for the inference process")
165174
parser.add_argument("--fps", type=int, default=16, help="Number of steps for the inference process")
166175
parser.add_argument("--num_videos_per_prompt", type=int, default=1, help="Number of videos to generate per prompt")
167176
parser.add_argument("--generate_type", type=str, default="t2v", help="The type of video generation")
@@ -177,6 +186,8 @@ def generate_video(
177186
lora_rank=args.lora_rank,
178187
output_path=args.output_path,
179188
num_frames=args.num_frames,
189+
width=args.width,
190+
height=args.height,
180191
image_or_video_path=args.image_or_video_path,
181192
num_inference_steps=args.num_inference_steps,
182193
guidance_scale=args.guidance_scale,

0 commit comments

Comments
 (0)