Skip to content

Commit 409b586

Browse files
authored
Merge branch 'main' into ltxv-0.9.1-integration
2 parents 167df2c + f615f00 commit 409b586

File tree

5 files changed

+13
-13
lines changed

5 files changed

+13
-13
lines changed

docs/source/en/api/models/autoencoder_kl_hunyuan_video.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ The model can be loaded with the following code snippet.
1818
```python
1919
from diffusers import AutoencoderKLHunyuanVideo
2020

21-
vae = AutoencoderKLHunyuanVideo.from_pretrained("tencent/HunyuanVideo", torch_dtype=torch.float16)
21+
vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16)
2222
```
2323

2424
## AutoencoderKLHunyuanVideo

docs/source/en/api/models/hunyuan_video_transformer_3d.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ The model can be loaded with the following code snippet.
1818
```python
1919
from diffusers import HunyuanVideoTransformer3DModel
2020

21-
transformer = HunyuanVideoTransformer3DModel.from_pretrained("tencent/HunyuanVideo", torch_dtype=torch.bfloat16)
21+
transformer = HunyuanVideoTransformer3DModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="transformer", torch_dtype=torch.bfloat16)
2222
```
2323

2424
## HunyuanVideoTransformer3DModel

docs/source/en/api/pipelines/hunyuan_video.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Recommendations for inference:
2929
- Transformer should be in `torch.bfloat16`.
3030
- VAE should be in `torch.float16`.
3131
- `num_frames` should be of the form `4 * k + 1`, for example `49` or `129`.
32-
- For smaller resolution images, try lower values of `shift` (between `2.0` to `5.0`) in the [Scheduler](https://huggingface.co/docs/diffusers/main/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler.shift). For larger resolution images, try higher values (between `7.0` and `12.0`). The default value is `7.0` for HunyuanVideo.
32+
- For smaller resolution videos, try lower values of `shift` (between `2.0` to `5.0`) in the [Scheduler](https://huggingface.co/docs/diffusers/main/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler.shift). For larger resolution images, try higher values (between `7.0` and `12.0`). The default value is `7.0` for HunyuanVideo.
3333
- For more information about supported resolutions and other details, please refer to the original repository [here](https://github.com/Tencent/HunyuanVideo/).
3434

3535
## HunyuanVideoPipeline

src/diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
>>> from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel
4040
>>> from diffusers.utils import export_to_video
4141
42-
>>> model_id = "tencent/HunyuanVideo"
42+
>>> model_id = "hunyuanvideo-community/HunyuanVideo"
4343
>>> transformer = HunyuanVideoTransformer3DModel.from_pretrained(
4444
... model_id, subfolder="transformer", torch_dtype=torch.bfloat16
4545
... )

src/diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -193,15 +193,15 @@ def __init__(
193193
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
194194
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
195195

196-
def enable_sequential_cpu_offload(self, gpu_id=0):
196+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
197197
r"""
198198
Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
199199
Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
200200
GPU only when their specific submodule's `forward` method is called. Offloading happens on a submodule basis.
201201
Memory savings are higher than using `enable_model_cpu_offload`, but performance is lower.
202202
"""
203-
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
204-
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
203+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
204+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
205205

206206
def progress_bar(self, iterable=None, total=None):
207207
self.prior_pipe.progress_bar(iterable=iterable, total=total)
@@ -411,16 +411,16 @@ def __init__(
411411
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
412412
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
413413

414-
def enable_sequential_cpu_offload(self, gpu_id=0):
414+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
415415
r"""
416416
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
417417
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
418418
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
419419
Note that offloading happens on a submodule basis. Memory savings are higher than with
420420
`enable_model_cpu_offload`, but performance is lower.
421421
"""
422-
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
423-
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
422+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
423+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
424424

425425
def progress_bar(self, iterable=None, total=None):
426426
self.prior_pipe.progress_bar(iterable=iterable, total=total)
@@ -652,16 +652,16 @@ def __init__(
652652
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
653653
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
654654

655-
def enable_sequential_cpu_offload(self, gpu_id=0):
655+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
656656
r"""
657657
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
658658
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
659659
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
660660
Note that offloading happens on a submodule basis. Memory savings are higher than with
661661
`enable_model_cpu_offload`, but performance is lower.
662662
"""
663-
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
664-
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
663+
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
664+
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id, device=device)
665665

666666
def progress_bar(self, iterable=None, total=None):
667667
self.prior_pipe.progress_bar(iterable=iterable, total=total)

0 commit comments

Comments
 (0)