Skip to content

Commit 93e8e2b

Browse files
committed
hunyuan video data preprocess
1 parent 663ea33 commit 93e8e2b

File tree

6 files changed

+112
-1
lines changed

6 files changed

+112
-1
lines changed

HunyuanVideo

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 4fdf87f3d8dbdca223d7f4511e7d86d3293ffccd
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/bash
2+
3+
GPU_NUM=1 # 2,4,8
4+
MODEL_PATH="hunyuanvideo-community/HunyuanVideo"
5+
DATASET_PATH="/FastVideo/data/mini_i2v_dataset/crush-smol_raw"
6+
OUTPUT_DIR="/FastVideo/data/mini_i2v_dataset/crush-smol_processed_t2v_hunyuan/"
7+
8+
torchrun --nproc_per_node=$GPU_NUM \
9+
-m fastvideo.pipelines.preprocess.v1_preprocessing_new \
10+
--model_path $MODEL_PATH \
11+
--mode preprocess \
12+
--workload_type t2v \
13+
--preprocess.dataset_path $DATASET_PATH \
14+
--preprocess.dataset_output_dir $OUTPUT_DIR \
15+
--preprocess.preprocess_video_batch_size 2 \
16+
--preprocess.dataloader_num_workers 0 \
17+
--preprocess.max_height 480 \
18+
--preprocess.max_width 832 \
19+
--preprocess.num_frames 77 \
20+
--preprocess.train_fps 16 \
21+
--preprocess.samples_per_file 8 \
22+
--preprocess.flush_frequency 8 \
23+
--preprocess.video_length_tolerance_range 5

fastvideo/pipelines/preprocess/hunyuan/__init__.py

Whitespace-only changes.
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
from fastvideo.fastvideo_args import FastVideoArgs
2+
from fastvideo.pipelines.composed_pipeline_base import ComposedPipelineBase
3+
from fastvideo.pipelines.preprocess.preprocess_stages import (
4+
TextTransformStage, VideoTransformStage)
5+
from fastvideo.pipelines.stages import (EncodingStage, ImageEncodingStage,
6+
TextEncodingStage)
7+
from fastvideo.pipelines.stages.image_encoding import ImageVAEEncodingStage
8+
9+
10+
class PreprocessPipelineI2V(ComposedPipelineBase):
11+
_required_config_modules = [
12+
"image_encoder", "image_processor", "text_encoder", "tokenizer", "vae"
13+
]
14+
15+
def create_pipeline_stages(self, fastvideo_args: FastVideoArgs):
16+
assert fastvideo_args.preprocess_config is not None
17+
self.add_stage(stage_name="text_transform_stage",
18+
stage=TextTransformStage(
19+
cfg_uncondition_drop_rate=fastvideo_args.
20+
preprocess_config.training_cfg_rate,
21+
seed=fastvideo_args.preprocess_config.seed,
22+
))
23+
self.add_stage(stage_name="prompt_encoding_stage",
24+
stage=TextEncodingStage(
25+
text_encoders=[self.get_module("text_encoder")],
26+
tokenizers=[self.get_module("tokenizer")],
27+
))
28+
self.add_stage(
29+
stage_name="video_transform_stage",
30+
stage=VideoTransformStage(
31+
train_fps=fastvideo_args.preprocess_config.train_fps,
32+
num_frames=fastvideo_args.preprocess_config.num_frames,
33+
max_height=fastvideo_args.preprocess_config.max_height,
34+
max_width=fastvideo_args.preprocess_config.max_width,
35+
do_temporal_sample=fastvideo_args.preprocess_config.
36+
do_temporal_sample,
37+
))
38+
if (self.get_module("image_encoder") is not None
39+
and self.get_module("image_processor") is not None):
40+
self.add_stage(
41+
stage_name="image_encoding_stage",
42+
stage=ImageEncodingStage(
43+
image_encoder=self.get_module("image_encoder"),
44+
image_processor=self.get_module("image_processor"),
45+
))
46+
self.add_stage(stage_name="image_vae_encoding_stage",
47+
stage=ImageVAEEncodingStage(
48+
vae=self.get_module("vae"), ))
49+
self.add_stage(stage_name="video_encoding_stage",
50+
stage=EncodingStage(vae=self.get_module("vae"), ))
51+
52+
53+
class PreprocessPipelineT2V(ComposedPipelineBase):
54+
_required_config_modules = ["text_encoder", "tokenizer", "vae"]
55+
56+
def create_pipeline_stages(self, fastvideo_args: FastVideoArgs):
57+
assert fastvideo_args.preprocess_config is not None
58+
self.add_stage(stage_name="text_transform_stage",
59+
stage=TextTransformStage(
60+
cfg_uncondition_drop_rate=fastvideo_args.
61+
preprocess_config.training_cfg_rate,
62+
seed=fastvideo_args.preprocess_config.seed,
63+
))
64+
self.add_stage(stage_name="prompt_encoding_stage",
65+
stage=TextEncodingStage(
66+
text_encoders=[self.get_module("text_encoder")],
67+
tokenizers=[self.get_module("tokenizer")],
68+
))
69+
self.add_stage(
70+
stage_name="video_transform_stage",
71+
stage=VideoTransformStage(
72+
train_fps=fastvideo_args.preprocess_config.train_fps,
73+
num_frames=fastvideo_args.preprocess_config.num_frames,
74+
max_height=fastvideo_args.preprocess_config.max_height,
75+
max_width=fastvideo_args.preprocess_config.max_width,
76+
do_temporal_sample=fastvideo_args.preprocess_config.
77+
do_temporal_sample,
78+
))
79+
self.add_stage(stage_name="video_encoding_stage",
80+
stage=EncodingStage(vae=self.get_module("vae"), ))
81+
82+
83+
EntryClass = [PreprocessPipelineI2V, PreprocessPipelineT2V]

fastvideo/workflow/preprocess/components.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,10 @@ def __call__(self, batch: dict[str, Any]) -> bool:
7979

8080
def _validate_data_type(self, batch: dict[str, Any]) -> bool:
8181
"""Validate basic validity of data items"""
82+
print("-------------------------------")
83+
print(batch)
8284
return not (batch["caption"] is None or batch["caption"] == ""
83-
or batch["fps"] is None or batch["fps"] <= 0
85+
or "fps" not in batch or batch["fps"] is None or batch["fps"] <= 0
8486
or batch["num_frames"] is None or batch["num_frames"] <= 0)
8587

8688
def _validate_resolution(self, batch: dict[str, Any]) -> bool:

fastvideo/workflow/preprocess/preprocess_workflow.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ def register_components(self) -> None:
4747
training_dataset = build_dataset(preprocess_config,
4848
split="train",
4949
validator=raw_data_validator)
50+
# set load_from_cache_file to False to check filter stats
51+
training_dataset = training_dataset.filter(raw_data_validator)
5052
# we do not use collate_fn here because we use iterable-style Dataset
5153
# and want to keep the original type of the dataset
5254
training_dataloader = DataLoader(

0 commit comments

Comments
 (0)