Skip to content

Commit d92ebbc

Browse files
committed
bugfix
1 parent 2ed4d01 commit d92ebbc

File tree

2 files changed

+1
-35
lines changed

2 files changed

+1
-35
lines changed

fastvideo/workflow/preprocess/components.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414
from datasets import Dataset, Video, load_dataset
1515

1616
from fastvideo.configs.configs import DatasetType, PreprocessConfig
17-
<<<<<<< HEAD
18-
from fastvideo.distributed.parallel_state import get_world_rank, get_world_size
19-
=======
20-
>>>>>>> 15df36ab ([Feat][Preprocess] support merged dataset (#752))
2117
from fastvideo.logger import init_logger
2218
from fastvideo.pipelines.pipeline_batch_info import PreprocessBatch
2319

@@ -82,8 +78,6 @@ def __call__(self, batch: dict[str, Any]) -> bool:
8278

8379
def _validate_data_type(self, batch: dict[str, Any]) -> bool:
8480
"""Validate basic validity of data items"""
85-
print("-------------------------------")
86-
print(batch)
8781
return not (batch["caption"] is None or batch["caption"] == ""
8882
or "fps" not in batch or batch["fps"] is None or batch["fps"] <= 0
8983
or batch["num_frames"] is None or batch["num_frames"] <= 0)
@@ -405,19 +399,10 @@ def _default_file_writer_fn(self, args_tuple: tuple) -> int:
405399
return written_count
406400

407401

408-
<<<<<<< HEAD
409-
def build_dataset(preprocess_config: PreprocessConfig, split: str,
410-
validator: Callable[[dict[str, Any]], bool]) -> Dataset:
411-
if preprocess_config.dataset_type == DatasetType.HF:
412-
dataset = load_dataset(preprocess_config.dataset_path, split=split)
413-
dataset = dataset.filter(validator)
414-
dataset = dataset.shard(num_shards=get_world_size(),
415-
index=get_world_rank())
416-
=======
402+
417403
def build_dataset(preprocess_config: PreprocessConfig, split: str) -> Dataset:
418404
if preprocess_config.dataset_type == DatasetType.HF:
419405
dataset = load_dataset(preprocess_config.dataset_path, split=split)
420-
>>>>>>> 15df36ab ([Feat][Preprocess] support merged dataset (#752))
421406
elif preprocess_config.dataset_type == DatasetType.MERGED:
422407
metadata_json_path = os.path.join(preprocess_config.dataset_path,
423408
"videos2caption.json")
@@ -431,14 +416,6 @@ def build_dataset(preprocess_config: PreprocessConfig, split: str) -> Dataset:
431416
dataset = dataset.rename_column("cap", "caption")
432417
if "path" in column_names:
433418
dataset = dataset.rename_column("path", "name")
434-
<<<<<<< HEAD
435-
436-
dataset = dataset.filter(validator)
437-
dataset = dataset.shard(num_shards=get_world_size(),
438-
index=get_world_rank())
439-
440-
=======
441-
>>>>>>> 15df36ab ([Feat][Preprocess] support merged dataset (#752))
442419
# add video column
443420
def add_video_column(item: dict[str, Any]) -> dict[str, Any]:
444421
item["video"] = os.path.join(video_folder, item["name"])

fastvideo/workflow/preprocess/preprocess_workflow.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,7 @@ def register_components(self) -> None:
4444
self.add_component("raw_data_validator", raw_data_validator)
4545

4646
# training dataset
47-
<<<<<<< HEAD
48-
training_dataset = build_dataset(preprocess_config,
49-
split="train",
50-
validator=raw_data_validator)
51-
=======
5247
training_dataset = build_dataset(preprocess_config, split="train")
53-
>>>>>>> 15df36ab ([Feat][Preprocess] support merged dataset (#752))
5448
# set load_from_cache_file to False to check filter stats
5549
training_dataset = training_dataset.filter(raw_data_validator)
5650
# we do not use collate_fn here because we use iterable-style Dataset
@@ -66,13 +60,8 @@ def register_components(self) -> None:
6660
# try to load validation dataset if it exists
6761
try:
6862
validation_dataset = build_dataset(preprocess_config,
69-
<<<<<<< HEAD
70-
split="validation",
71-
validator=raw_data_validator)
72-
=======
7363
split="validation")
7464
validation_dataset = validation_dataset.filter(raw_data_validator)
75-
>>>>>>> 15df36ab ([Feat][Preprocess] support merged dataset (#752))
7665
validation_dataloader = DataLoader(
7766
validation_dataset,
7867
batch_size=preprocess_config.preprocess_video_batch_size,

0 commit comments

Comments
 (0)