|
11 | 11 | import json |
12 | 12 | import numpy as np |
13 | 13 | import torch |
| 14 | +import cv2 |
14 | 15 | from safetensors import safe_open |
15 | 16 | from PIL import Image |
16 | 17 | from transformers import ( |
|
35 | 36 | from ltx_video.schedulers.rf import RectifiedFlowScheduler |
36 | 37 | from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy |
37 | 38 | from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler |
| 39 | +import ltx_video.pipelines.crf_compressor as crf_compressor |
38 | 40 |
|
39 | 41 | MAX_HEIGHT = 720 |
40 | 42 | MAX_WIDTH = 1280 |
@@ -96,7 +98,12 @@ def load_image_to_tensor_with_resize_and_crop( |
96 | 98 | image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height)) |
97 | 99 | if not just_crop: |
98 | 100 | image = image.resize((target_width, target_height)) |
99 | | - frame_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float() |
| 101 | + |
| 102 | + image = np.array(image) |
| 103 | + image = cv2.GaussianBlur(image, (3, 3), 0) |
| 104 | + frame_tensor = torch.from_numpy(image).float() |
| 105 | + frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0 |
| 106 | + frame_tensor = frame_tensor.permute(2, 0, 1) |
100 | 107 | frame_tensor = (frame_tensor / 127.5) - 1.0 |
101 | 108 | # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width) |
102 | 109 | return frame_tensor.unsqueeze(0).unsqueeze(2) |
@@ -266,13 +273,6 @@ def main(): |
266 | 273 | help="Path to the input video (or imaage) to be modified using the video-to-video pipeline", |
267 | 274 | ) |
268 | 275 |
|
269 | | - parser.add_argument( |
270 | | - "--strength", |
271 | | - type=float, |
272 | | - default=1.0, |
273 | | - help="Editing strength (noising level) for video-to-video pipeline.", |
274 | | - ) |
275 | | - |
276 | 276 | # Conditioning arguments |
277 | 277 | parser.add_argument( |
278 | 278 | "--conditioning_media_paths", |
@@ -407,7 +407,6 @@ def infer( |
407 | 407 | negative_prompt: str, |
408 | 408 | offload_to_cpu: bool, |
409 | 409 | input_media_path: Optional[str] = None, |
410 | | - strength: Optional[float] = 1.0, |
411 | 410 | conditioning_media_paths: Optional[List[str]] = None, |
412 | 411 | conditioning_strengths: Optional[List[float]] = None, |
413 | 412 | conditioning_start_frames: Optional[List[int]] = None, |
@@ -614,7 +613,6 @@ def infer( |
614 | 613 | frame_rate=frame_rate, |
615 | 614 | **sample, |
616 | 615 | media_items=media_item, |
617 | | - strength=strength, |
618 | 616 | conditioning_items=conditioning_items, |
619 | 617 | is_video=True, |
620 | 618 | vae_per_channel_normalize=True, |
|
0 commit comments