Skip to content

Commit c9fbf25

Browse files
author
Gleb Sterkin
committed
remove redundant code, fix documentation
1 parent fa9e5eb commit c9fbf25

File tree

7 files changed

+190
-452
lines changed

7 files changed

+190
-452
lines changed

video/wan2.1/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Wan2.1 text-to-video and image-to-video implementation in MLX. The model
55
weights are downloaded directly from the [Hugging Face
66
Hub](https://huggingface.co/Wan-AI).
77

8-
| Model | Task | HF Repo | RAM (unquantized) | Single DiT step on M4 Pro chip |
8+
| Model | Task | HF Repo | RAM (unquantized), 81 frames | Single DiT step on M4 Max chip, 81 frames |
99
|-------|------|---------|-----------------|---|
1010
| 1.3B | T2V | [Wan-AI/Wan2.1-T2V-1.3B](https://huggingface.co/Wan-AI/Wan2.1-T2V-1.3B) | ~10GB | ~100 s/it |
1111
| 14B | T2V | [Wan-AI/Wan2.1-T2V-14B](https://huggingface.co/Wan-AI/Wan2.1-T2V-14B) | ~36GB | ~230 s/it |
@@ -82,7 +82,7 @@ python txt2video.py 'A cat playing piano' --quantize --output out_quantized.mp4
8282
```
8383

8484
### Disabling the cache
85-
To get additional memory savings at the expense of a bit of speed use `--no-cache` argument that will prevent MLX from utilizing the cache (sets `mx.set_cache_limit(0)` under the hood). See [documentation](https://ml-explore.github.io/mlx/build/html/python/_autosummary/mlx.core.set_cache_limit.html) for more info
85+
To get additional memory savings at the expense of a bit of speed use `--no-cache` argument. It will prevent MLX from utilizing the cache (sets `mx.set_cache_limit(0)` under the hood). See [documentation](https://ml-explore.github.io/mlx/build/html/python/_autosummary/mlx.core.set_cache_limit.html) for more info
8686
```shell
8787
python txt2video.py 'A cat playing piano' --output out.mp4 --no-cache
8888
```
@@ -107,15 +107,15 @@ python txt2video.py 'A cat playing piano' \
107107
--quantize --output out_t2v_distilled.mp4
108108
```
109109

110-
For image to video pipeline we use [4 steps distilled i2v model](https://huggingface.co/lightx2v/Wan2.1-Distill-Models/blob/main/wan2.1_i2v_480p_scaled_fp8_e4m3_lightx2v_4step.safetensors)
110+
For image to video pipeline we use [4 steps distilled i2v model](https://huggingface.co/lightx2v/Wan2.1-Distill-Models/resolve/main/wan2.1_i2v_480p_lightx2v_4step.safetensors)
111111

112112
```shell
113-
wget https://huggingface.co/lightx2v/Wan2.1-Distill-Models/blob/main/wan2.1_i2v_480p_scaled_fp8_e4m3_lightx2v_4step.safetensors
113+
wget https://huggingface.co/lightx2v/Wan2.1-Distill-Models/resolve/main/wan2.1_i2v_480p_lightx2v_4step.safetensors
114114
```
115115

116116
```shell
117117
python img2video.py 'Astronaut riding a horse' \
118-
--image ./inputs/astronaut-on-a-horse.png --checkpoint ./wan2.1_i2v_480p_scaled_fp8_e4m3_lightx2v_4step.safetensors \
118+
--image ./inputs/astronaut-on-a-horse.png --checkpoint ./wan2.1_i2v_480p_lightx2v_4step.safetensors \
119119
--sampler euler --steps 4 --guidance 1.0 --shift 5.0 \
120120
--quantize --output out_i2v_distilled.mp4
121121
```

video/wan2.1/img2video.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import mlx.core as mx
99
import mlx.nn as nn
1010
from tqdm import tqdm
11-
from wan import WanI2VPipeline
11+
from wan import WanPipeline
1212
from wan.utils import save_video
1313

1414

@@ -51,6 +51,12 @@ def quantization_predicate(name, m):
5151
"--n-prompt",
5252
default="镜头晃动,色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
5353
)
54+
parser.add_argument(
55+
"--teacache",
56+
type=float,
57+
default=0.0,
58+
help="TeaCache threshold for step skipping (0=off, 0.26=recommended for i2v)",
59+
)
5460
parser.add_argument(
5561
"--checkpoint",
5662
type=str,
@@ -94,7 +100,7 @@ def quantization_predicate(name, m):
94100
logging.getLogger("wan").addHandler(handler)
95101

96102
# Load pipeline
97-
pipeline = WanI2VPipeline(args.model, checkpoint=args.checkpoint)
103+
pipeline = WanPipeline(args.model, checkpoint=args.checkpoint)
98104

99105
# Quantize DiT
100106
if args.quantize:
@@ -117,6 +123,7 @@ def quantization_predicate(name, m):
117123
guidance=args.guidance,
118124
shift=args.shift,
119125
seed=args.seed,
126+
teacache=args.teacache,
120127
verbose=args.verbose,
121128
denoising_step_list=denoising_step_list,
122129
)
@@ -129,7 +136,8 @@ def quantization_predicate(name, m):
129136

130137
# Free T5 and CLIP memory
131138
del pipeline.t5
132-
del pipeline.clip
139+
if pipeline.clip is not None:
140+
del pipeline.clip
133141
mx.clear_cache()
134142

135143
# 2. Denoising loop

video/wan2.1/txt2video.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import mlx.core as mx
99
import mlx.nn as nn
1010
from tqdm import tqdm
11-
from wan import WanT2VPipeline
11+
from wan import WanPipeline
1212
from wan.utils import save_video
1313

1414

@@ -99,7 +99,7 @@ def quantization_predicate(name, m):
9999
logging.getLogger("wan").addHandler(handler)
100100

101101
# Load pipeline
102-
pipeline = WanT2VPipeline(args.model, checkpoint=args.checkpoint)
102+
pipeline = WanPipeline(args.model, checkpoint=args.checkpoint)
103103

104104
# Quantize DiT
105105
if args.quantize:

video/wan2.1/wan/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# Copyright © 2026 Apple Inc.
22

3-
from .pipeline import WanI2VPipeline, WanT2VPipeline
3+
from .pipeline import WanPipeline

0 commit comments

Comments
 (0)