99import multiprocessing
1010import os
1111from concurrent .futures import ProcessPoolExecutor
12+ from typing import Any , Dict
1213
1314import numpy as np
1415import pyarrow as pa
@@ -52,8 +53,8 @@ def forward(
5253 args ,
5354 ):
5455 # Initialize class variables for data sharing
55- self .video_data = {} # Store video metadata and paths
56- self .latent_data = {} # Store latent tensors
56+ self .video_data : Dict [ str , Any ] = {} # Store video metadata and paths
57+ self .latent_data : Dict [ str , Any ] = {} # Store latent tensors
5758 self .preprocess_validation_text (fastvideo_args , args )
5859 self .preprocess_video_and_text (fastvideo_args , args )
5960
@@ -352,7 +353,6 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
352353 "validation_parquet_dataset" )
353354 os .makedirs (validation_parquet_dir , exist_ok = True )
354355
355-
356356 with open (args .validation_prompt_txt , encoding = "utf-8" ) as file :
357357 lines = file .readlines ()
358358 prompts = [line .strip () for line in lines ]
@@ -506,7 +506,7 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
506506 gc .collect () # Force garbage collection
507507
508508 @staticmethod
509- def process_chunk_range (args ) :
509+ def process_chunk_range (args : Any ) -> int :
510510 start_idx , end_idx , table , worker_id , output_dir , samples_per_file = args
511511 try :
512512 total_written = 0
@@ -558,4 +558,4 @@ def process_chunk_range(args):
558558 raise
559559
560560
561- EntryClass = PreprocessPipeline
561+ EntryClass = PreprocessPipeline
0 commit comments