|
17 | 17 | import math |
18 | 18 | from typing import Callable, Dict, List, Optional, Tuple, Union |
19 | 19 |
|
20 | | -import PIL |
21 | 20 | import torch |
22 | 21 | from transformers import T5EncoderModel, T5Tokenizer |
23 | 22 |
|
24 | 23 | from ...callbacks import MultiPipelineCallbacks, PipelineCallback |
| 24 | +from ...image_processor import PipelineImageInput |
25 | 25 | from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel |
26 | 26 | from ...models.embeddings import get_3d_rotary_pos_embed |
27 | 27 | from ...pipelines.pipeline_utils import DiffusionPipeline |
|
49 | 49 | >>> pipe.to("cuda") |
50 | 50 |
|
51 | 51 | >>> prompt = "An astronaut hatching from an egg, on the surface of the moon, the darkness and depth of space realised in the background. High quality, ultrarealistic detail and breath-taking movie-like camera shot." |
52 | | - >>> image = load_image("astronaut.jpg") # TODO: Add link to 720x480 image from HF Docs repo |
| 52 | + >>> image = load_image( |
| 53 | + ... "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg" |
| 54 | + ... ) |
53 | 55 | >>> video = pipe(image, prompt, use_dynamic_cfg=True) |
54 | 56 | >>> export_to_video(video.frames[0], "output.mp4", fps=8) |
55 | 57 | ``` |
@@ -548,7 +550,7 @@ def interrupt(self): |
548 | 550 | @replace_example_docstring(EXAMPLE_DOC_STRING) |
549 | 551 | def __call__( |
550 | 552 | self, |
551 | | - image: Union[PIL.Image.Image, List[PIL.Image.Image], torch.Tensor], |
| 553 | + image: PipelineImageInput, |
552 | 554 | prompt: Optional[Union[str, List[str]]] = None, |
553 | 555 | negative_prompt: Optional[Union[str, List[str]]] = None, |
554 | 556 | height: int = 480, |
@@ -576,8 +578,8 @@ def __call__( |
576 | 578 | Function invoked when calling the pipeline for generation. |
577 | 579 |
|
578 | 580 | Args: |
579 | | - image (`List[PIL.Image.Image]`): |
580 | | - The input video to condition the generation on. Must be a list of images/frames of the video. |
| 581 | + image (`PipelineImageInput`): |
| 582 | + The input video to condition the generation on. Must be an image, a list of images or a `torch.Tensor`. |
581 | 583 | prompt (`str` or `List[str]`, *optional*): |
582 | 584 | The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. |
583 | 585 | instead. |
|
0 commit comments