Skip to content

Commit 0c1358c

Browse files
committed
update image link
1 parent 29f1007 commit 0c1358c

File tree

2 files changed

+9
-7
lines changed

2 files changed

+9
-7
lines changed

src/diffusers/models/transformers/cogvideox_transformer_3d.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,8 @@ def custom_forward(*inputs):
466466

467467
# 5. Unpatchify
468468
# Note: we use `-1` instead of `channels`:
469-
# - It is okay to use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
470-
# - However, for CogVideoX-5b-I2V, input image (number of input channels is twice the output channels)
469+
# - It is okay to `channels` use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
470+
# - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
471471
p = self.config.patch_size
472472
output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
473473
output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)

src/diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
import math
1818
from typing import Callable, Dict, List, Optional, Tuple, Union
1919

20-
import PIL
2120
import torch
2221
from transformers import T5EncoderModel, T5Tokenizer
2322

2423
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
24+
from ...image_processor import PipelineImageInput
2525
from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
2626
from ...models.embeddings import get_3d_rotary_pos_embed
2727
from ...pipelines.pipeline_utils import DiffusionPipeline
@@ -49,7 +49,9 @@
4949
>>> pipe.to("cuda")
5050
5151
>>> prompt = "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
52-
>>> image = load_image("astronaut.jpg") # TODO: Add link to 720x480 image from HF Docs repo
52+
>>> image = load_image(
53+
... "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"
54+
... )
5355
>>> video = pipe(image, prompt, use_dynamic_cfg=True)
5456
>>> export_to_video(video.frames[0], "output.mp4", fps=8)
5557
```
@@ -548,7 +550,7 @@ def interrupt(self):
548550
@replace_example_docstring(EXAMPLE_DOC_STRING)
549551
def __call__(
550552
self,
551-
image: Union[PIL.Image.Image, List[PIL.Image.Image], torch.Tensor],
553+
image: PipelineImageInput,
552554
prompt: Optional[Union[str, List[str]]] = None,
553555
negative_prompt: Optional[Union[str, List[str]]] = None,
554556
height: int = 480,
@@ -576,8 +578,8 @@ def __call__(
576578
Function invoked when calling the pipeline for generation.
577579
578580
Args:
579-
image (`List[PIL.Image.Image]`):
580-
The input video to condition the generation on. Must be a list of images/frames of the video.
581+
image (`PipelineImageInput`):
582+
The input video to condition the generation on. Must be an image, a list of images or a `torch.Tensor`.
581583
prompt (`str` or `List[str]`, *optional*):
582584
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
583585
instead.

0 commit comments

Comments
 (0)