@@ -135,6 +135,7 @@ def preprocess_video_and_text(self, fastvideo_args: FastVideoArgs, args):
135135                    prompt_embeds = [],
136136                    prompt_attention_mask = [],
137137                )
138+                 assert  hasattr (self , "prompt_encoding_stage" )
138139                result_batch  =  self .prompt_encoding_stage (batch , fastvideo_args )
139140                prompt_embeds , prompt_attention_mask  =  result_batch .prompt_embeds [
140141                    0 ], result_batch .prompt_attention_mask [0 ]
@@ -266,7 +267,7 @@ def preprocess_video_and_text(self, fastvideo_args: FastVideoArgs, args):
266267                    self .all_tables  =  []
267268                self .all_tables .append (table )
268269
269-                 logger .info (f "Collected batch with { len (table )}  samples" 
270+                 logger .info ("Collected batch with %s samples"  ,  len (table ))
270271
271272            if  num_processed_samples  >=  args .flush_frequency :
272273                assert  hasattr (self , 'all_tables' ) and  self .all_tables 
@@ -295,7 +296,7 @@ def preprocess_video_and_text(self, fastvideo_args: FastVideoArgs, args):
295296                print (
296297                    f"Using { num_workers } { total_chunks }  
297298                )
298-                 logger .info (f "Chunks per worker: { chunks_per_worker } " 
299+                 logger .info ("Chunks per worker: %s"  ,  chunks_per_worker )
299300
300301                # Prepare work ranges 
301302                work_ranges  =  []
@@ -319,30 +320,28 @@ def preprocess_video_and_text(self, fastvideo_args: FastVideoArgs, args):
319320                        try :
320321                            written  =  future .result ()
321322                            total_written  +=  written 
322-                             logger .info (
323-                                 f"Processed chunk with  { written }  samples" 
323+                             logger .info ("Processed chunk with %s samples" , 
324+                                          written )
324325                        except  Exception  as  e :
325326                            work_range  =  futures [future ]
326327                            failed_ranges .append (work_range )
327-                             logger .error (
328-                                 f"Failed to process range { work_range [0 ]} { work_range [1 ]} { str (e )}  
329-                             )
328+                             logger .error ("Failed to process range %s-%s: %s" ,
329+                                          work_range [0 ], work_range [1 ], str (e ))
330330
331331                # Retry failed ranges sequentially 
332332                if  failed_ranges :
333-                     logger .warning (
334-                         f"Retrying { len (failed_ranges )}  
335-                     )
333+                     logger .warning ("Retrying %s failed ranges sequentially" ,
334+                                    len (failed_ranges ))
336335                    for  work_range  in  failed_ranges :
337336                        try :
338337                            total_written  +=  self .process_chunk_range (
339338                                work_range )
340339                        except  Exception  as  e :
341340                            logger .error (
342-                                 f "Failed to process range { work_range [ 0 ] } - { work_range [ 1 ] } { str ( e ) } " 
343-                             )
341+                                 "Failed to process range %s-%s  after retry: %s"  , 
342+                                  work_range [ 0 ],  work_range [ 1 ],  str ( e ) )
344343
345-                 logger .info (f "Total samples written: { total_written } " 
344+                 logger .info ("Total samples written: %s"  ,  total_written )
346345
347346                num_processed_samples  =  0 
348347                self .all_tables  =  []
@@ -373,6 +372,7 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
373372                    prompt_embeds = [],
374373                    prompt_attention_mask = [],
375374                )
375+                 assert  hasattr (self , "prompt_encoding_stage" )
376376                result_batch  =  self .prompt_encoding_stage (batch , fastvideo_args )
377377            prompt_embeds  =  result_batch .prompt_embeds [0 ]
378378            prompt_attention_mask  =  result_batch .prompt_attention_mask [0 ]
@@ -388,8 +388,8 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
388388
389389            # Log the shapes after removing padding 
390390            logger .info (
391-                 f "Shape after removing padding - Embeddings: { text_embedding . shape } { text_attention_mask . shape } " 
392-             )
391+                 "Shape after removing padding - Embeddings: %s , Mask: %s"  , 
392+                  text_embedding . shape ,  text_attention_mask . shape )
393393
394394            # Create record for Parquet dataset 
395395            record  =  {
@@ -414,7 +414,7 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
414414            }
415415            batch_data .append (record )
416416
417-             logger .info (f "Saved validation sample: { file_name } " 
417+             logger .info ("Saved validation sample: %s"  ,  file_name )
418418
419419        if  batch_data :
420420            # Add progress bar for writing to Parquet dataset 
@@ -467,7 +467,7 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
467467            write_pbar .update (1 )
468468            write_pbar .close ()
469469
470-             logger .info (f "Total validation samples: { len (table )} " 
470+             logger .info ("Total validation samples: %s"  ,  len (table ))
471471
472472            work_range  =  (0 , 1 , table , 0 , validation_parquet_dir , len (table ))
473473
@@ -484,22 +484,21 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
484484                    except  Exception  as  e :
485485                        work_range  =  futures [future ]
486486                        failed_ranges .append (work_range )
487-                         logger .error (
488-                             f"Failed to process range { work_range [0 ]} { work_range [1 ]} { str (e )}  
489-                         )
487+                         logger .error ("Failed to process range %s-%s: %s" ,
488+                                      work_range [0 ], work_range [1 ], str (e ))
490489
491490            if  failed_ranges :
492-                 logger .warning (
493-                     f"Retrying  { len (failed_ranges )}  failed ranges sequentially" 
491+                 logger .warning ("Retrying %s failed ranges sequentially" , 
492+                                 len (failed_ranges ))
494493                for  work_range  in  failed_ranges :
495494                    try :
496495                        total_written  +=  self .process_chunk_range (work_range )
497496                    except  Exception  as  e :
498497                        logger .error (
499-                             f "Failed to process range { work_range [ 0 ] } - { work_range [ 1 ] } { str ( e ) } " 
500-                         )
498+                             "Failed to process range %s-%s  after retry: %s"  , 
499+                              work_range [ 0 ],  work_range [ 1 ],  str ( e ) )
501500
502-             logger .info (f "Total validation samples written: { total_written } " 
501+             logger .info ("Total validation samples written: %s"  ,  total_written )
503502
504503            # Clear memory 
505504            del  table 
@@ -552,10 +551,9 @@ def process_chunk_range(args: Any) -> int:
552551
553552            return  total_written 
554553        except  Exception  as  e :
555-             logger .error (
556-                 f"Error processing chunks { start_idx } { end_idx } { worker_id } { str (e )}  
557-             )
554+             logger .error ("Error processing chunks %s-%s for worker %s: %s" ,
555+                          start_idx , end_idx , worker_id , str (e ))
558556            raise 
559557
560558
561- EntryClass  =  PreprocessPipeline 
559+ EntryClass  =  PreprocessPipeline 
0 commit comments