fix image size compatibility (#16)

Howe2018 · web-flow · commit aa2598afb93f · 2025-04-22T12:13:42.000+08:00
diff --git a/generate_video.py b/generate_video.py
@@ -11,6 +11,7 @@
 from skyreels_v2_infer.modules import download_model
 from skyreels_v2_infer.pipelines import Image2VideoPipeline
 from skyreels_v2_infer.pipelines import PromptEnhancer
+from skyreels_v2_infer.pipelines import resizecrop
 from skyreels_v2_infer.pipelines import Text2VideoPipeline
 
 MODEL_ID_CONFIG = {
@@ -109,6 +110,11 @@
         pipe = Image2VideoPipeline(
             model_path=args.model_id, dit_path=args.model_id, use_usp=args.use_usp, offload=args.offload
         )
+        args.image = load_image(args.image)
+        image_width, image_height = args.image.size
+        if image_height > image_width:
+            height, width = width, height
+        args.image = resizecrop(args.image, height, width)
 
     prompt_input = args.prompt
     if args.prompt_enhancer and image is not None:
@@ -128,7 +134,7 @@
     }
 
     if image is not None:
-        kwargs["image"] = load_image(args.image).convert("RGB")
+        kwargs["image"] = args.image.convert("RGB")
 
     save_dir = os.path.join("result", args.outdir)
     os.makedirs(save_dir, exist_ok=True)
diff --git a/skyreels_v2_infer/pipelines/__init__.py b/skyreels_v2_infer/pipelines/__init__.py
@@ -1,4 +1,5 @@
 from .diffusion_forcing_pipeline import DiffusionForcingPipeline
-from .text2video_pipeline import Text2VideoPipeline
 from .image2video_pipeline import Image2VideoPipeline
+from .image2video_pipeline import resizecrop
 from .prompt_enhancer import PromptEnhancer
+from .text2video_pipeline import Text2VideoPipeline
diff --git a/skyreels_v2_infer/pipelines/image2video_pipeline.py b/skyreels_v2_infer/pipelines/image2video_pipeline.py
@@ -7,6 +7,7 @@
 import torch
 from diffusers.image_processor import PipelineImageInput
 from diffusers.video_processor import VideoProcessor
+from PIL import Image
 from tqdm import tqdm
 
 from ..modules import get_image_encoder
@@ -16,6 +17,24 @@
 from ..scheduler.fm_solvers_unipc import FlowUniPCMultistepScheduler
 
 
+def resizecrop(image: Image.Image, th, tw):
+    w, h = image.size
+    if w == tw and h == th:
+        return image
+    if h / w > th / tw:
+        new_w = int(w)
+        new_h = int(new_w * th / tw)
+    else:
+        new_h = int(h)
+        new_w = int(new_h * tw / th)
+    left = (w - new_w) / 2
+    top = (h - new_h) / 2
+    right = (w + new_w) / 2
+    bottom = (h + new_h) / 2
+    image = image.crop((left, top, right, bottom))
+    return image
+
+
 class Image2VideoPipeline:
     def __init__(
         self, model_path, dit_path, device: str = "cuda", weight_dtype=torch.bfloat16, use_usp=False, offload=False