Skip to content

Commit 3db92eb

Browse files
authored
Set encoder TP size to 1 by default (hao-ai-lab#569)
1 parent 8f1c5bc commit 3db92eb

File tree

18 files changed

+27
-53
lines changed

18 files changed

+27
-53
lines changed

docs/source/inference/cli.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ fastvideo generate --help
2727
### Hardware Configuration
2828

2929
- `--num-gpus {NUM_GPUS}`: Number of GPUs to use
30-
- `--tp-size {TP_SIZE}`: Tensor parallelism size (Typically should match the number of GPUs)
30+
- `--tp-size {TP_SIZE}`: Tensor parallelism size (only for the encoder, should not be larger than 1 if text encoder offload is enabled, as layerwise offload + prefetch is faster)
3131
- `--sp-size {SP_SIZE}`: Sequence parallelism size (Typically should match the number of GPUs)
3232

3333
#### Video Configuration
@@ -68,7 +68,7 @@ Example configuration file (config.json):
6868
"output_path": "outputs/",
6969
"num_gpus": 2,
7070
"sp_size": 2,
71-
"tp_size": 2,
71+
"tp_size": 1,
7272
"num_frames": 45,
7373
"height": 720,
7474
"width": 1280,
@@ -102,7 +102,7 @@ prompt: "A beautiful woman in a red dress walking down a street"
102102
output_path: "outputs/"
103103
num_gpus: 2
104104
sp_size: 2
105-
tp_size: 2
105+
tp_size: 1
106106
num_frames: 45
107107
height: 720
108108
width: 1280

examples/training/finetune/wan_i2v_14b_480p/crush_smol/finetune_i2v.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ training_args=(
3030
parallel_args=(
3131
--num_gpus $NUM_GPUS
3232
--sp_size 8
33-
--tp_size 8
33+
--tp_size 1
3434
--hsdp_replicate_dim 1
3535
--hsdp_shard_dim 8
3636
)

examples/training/finetune/wan_i2v_14b_480p/crush_smol/finetune_i2v.slurm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ training_args=(
6666
parallel_args=(
6767
--num_gpus $NUM_GPUS
6868
--sp_size $NUM_GPUS
69-
--tp_size $NUM_GPUS
69+
--tp_size 1
7070
--hsdp_replicate_dim $SLURM_JOB_NUM_NODES
7171
--hsdp_shard_dim $NUM_GPUS
7272
)

examples/training/finetune/wan_t2v_1_3b/crush_smol/finetune_t2v.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ training_args=(
3030
parallel_args=(
3131
--num_gpus $NUM_GPUS
3232
--sp_size $NUM_GPUS
33-
--tp_size $NUM_GPUS
33+
--tp_size 1
3434
--hsdp_replicate_dim 1
3535
--hsdp_shard_dim $NUM_GPUS
3636
)

examples/training/finetune/wan_t2v_1_3b/crush_smol/finetune_t2v.slurm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ training_args=(
6363
parallel_args=(
6464
--num_gpus $NUM_GPUS
6565
--sp_size 4
66-
--tp_size 4
66+
--tp_size 1
6767
--hsdp_replicate_dim 2
6868
--hsdp_shard_dim 4
6969
)

fastvideo/v1/fastvideo_args.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def check_fastvideo_args(self) -> None:
292292
assert self.sp_size != -1, "sp_size must be set for training"
293293

294294
if self.tp_size == -1:
295-
self.tp_size = self.num_gpus
295+
self.tp_size = 1
296296
if self.sp_size == -1:
297297
self.sp_size = self.num_gpus
298298
if self.hsdp_shard_dim == -1:
@@ -305,11 +305,6 @@ def check_fastvideo_args(self) -> None:
305305
if self.num_gpus < max(self.tp_size, self.sp_size):
306306
self.num_gpus = max(self.tp_size, self.sp_size)
307307

308-
if self.tp_size != self.sp_size:
309-
raise ValueError(
310-
f"tp_size ({self.tp_size}) must be equal to sp_size ({self.sp_size})"
311-
)
312-
313308
if self.enable_torch_compile and self.num_gpus > 1:
314309
logger.warning(
315310
"Currently torch compile does not work with multi-gpu. Setting enable_torch_compile to False"

fastvideo/v1/tests/nightly/test_e2e_i2v_overfit_single_sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def run_training():
105105
"--num_latent_t", "8",
106106
"--num_gpus", NUM_GPUS_PER_NODE_TRAINING,
107107
"--sp_size", NUM_GPUS_PER_NODE_TRAINING,
108-
"--tp_size", NUM_GPUS_PER_NODE_TRAINING,
108+
"--tp_size", 1,
109109
"--hsdp_replicate_dim", "1",
110110
"--hsdp_shard_dim", NUM_GPUS_PER_NODE_TRAINING,
111111
"--num_gpus", NUM_GPUS_PER_NODE_TRAINING,

fastvideo/v1/tests/ssim/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ FastHunyuan-diffusers: {
2424
"flow_shift": 17,
2525
"seed": 1024,
2626
"sp_size": 2,
27-
"tp_size": 2,
27+
"tp_size": 1,
2828
"vae_sp": true,
2929
"fps": 24
3030
}
@@ -41,7 +41,7 @@ Wan2.1-T2V-1.3B-Diffusers: {
4141
"flow_shift": 7.0,
4242
"seed": 1024,
4343
"sp_size": 2,
44-
"tp_size": 2,
44+
"tp_size": 1,
4545
"vae_sp": True,
4646
"fps": 24,
4747
"neg_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",
@@ -60,7 +60,7 @@ Wan2.1-I2V-14B-480P-Diffusers: {
6060
"flow_shift": 7.0,
6161
"seed": 1024,
6262
"sp_size": 2,
63-
"tp_size": 2,
63+
"tp_size": 1,
6464
"vae_sp": True,
6565
"fps": 24,
6666
"neg_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",

fastvideo/v1/tests/ssim/test_inference_similarity.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"flow_shift": 17,
3434
"seed": 1024,
3535
"sp_size": 2,
36-
"tp_size": 2,
36+
"tp_size": 1,
3737
"vae_sp": True,
3838
"fps": 24,
3939
}
@@ -50,7 +50,7 @@
5050
"flow_shift": 7.0,
5151
"seed": 1024,
5252
"sp_size": 2,
53-
"tp_size": 2,
53+
"tp_size": 1,
5454
"vae_sp": True,
5555
"fps": 24,
5656
"neg_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",
@@ -69,7 +69,7 @@
6969
"flow_shift": 7.0,
7070
"seed": 1024,
7171
"sp_size": 2,
72-
"tp_size": 2,
72+
"tp_size": 1,
7373
"vae_sp": True,
7474
"fps": 24,
7575
"neg_prompt": "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards",
@@ -238,7 +238,7 @@ def test_i2v_inference_similarity(prompt, ATTENTION_BACKEND, model_id):
238238
logger.error("Failed to write SSIM results to file")
239239

240240
min_acceptable_ssim = 0.97
241-
assert mean_ssim >= min_acceptable_ssim, f"SSIM value {mean_ssim} is below threshold {min_acceptable_ssim}"
241+
assert mean_ssim >= min_acceptable_ssim, f"SSIM value {mean_ssim} is below threshold {min_acceptable_ssim} for {model_id} with backend {ATTENTION_BACKEND}"
242242

243243
@pytest.mark.parametrize("prompt", TEST_PROMPTS)
244244
@pytest.mark.parametrize("ATTENTION_BACKEND", ["FLASH_ATTN", "TORCH_SDPA"])
@@ -337,5 +337,5 @@ def test_inference_similarity(prompt, ATTENTION_BACKEND, model_id):
337337
if not success:
338338
logger.error("Failed to write SSIM results to file")
339339

340-
min_acceptable_ssim = 0.95
341-
assert mean_ssim >= min_acceptable_ssim, f"SSIM value {mean_ssim} is below threshold {min_acceptable_ssim}"
340+
min_acceptable_ssim = 0.93
341+
assert mean_ssim >= min_acceptable_ssim, f"SSIM value {mean_ssim} is below threshold {min_acceptable_ssim} for {model_id} with backend {ATTENTION_BACKEND}"

fastvideo/v1/tests/training/Vanilla/test_training_loss.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def run_worker():
4343
"--num_latent_t", "4",
4444
"--num_gpus", "4",
4545
"--sp_size", "4",
46-
"--tp_size", "4",
46+
"--tp_size", "1",
4747
"--hsdp_replicate_dim", "1",
4848
"--hsdp_shard_dim", "4",
4949
"--train_sp_batch_size", "1",

0 commit comments

Comments
 (0)