Skip to content

Commit 83b7093

Browse files
committed
Passed pre-commit test
1 parent 1ea1c82 commit 83b7093

File tree

1 file changed

+27
-29
lines changed

1 file changed

+27
-29
lines changed

fastvideo/v1/pipelines/preprocess_pipeline.py

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def preprocess_video_and_text(self, fastvideo_args: FastVideoArgs, args):
135135
prompt_embeds=[],
136136
prompt_attention_mask=[],
137137
)
138+
assert hasattr(self, "prompt_encoding_stage")
138139
result_batch = self.prompt_encoding_stage(batch, fastvideo_args)
139140
prompt_embeds, prompt_attention_mask = result_batch.prompt_embeds[
140141
0], result_batch.prompt_attention_mask[0]
@@ -266,7 +267,7 @@ def preprocess_video_and_text(self, fastvideo_args: FastVideoArgs, args):
266267
self.all_tables = []
267268
self.all_tables.append(table)
268269

269-
logger.info(f"Collected batch with {len(table)} samples")
270+
logger.info("Collected batch with %s samples", len(table))
270271

271272
if num_processed_samples >= args.flush_frequency:
272273
assert hasattr(self, 'all_tables') and self.all_tables
@@ -295,7 +296,7 @@ def preprocess_video_and_text(self, fastvideo_args: FastVideoArgs, args):
295296
print(
296297
f"Using {num_workers} workers to process {total_chunks} chunks"
297298
)
298-
logger.info(f"Chunks per worker: {chunks_per_worker}")
299+
logger.info("Chunks per worker: %s", chunks_per_worker)
299300

300301
# Prepare work ranges
301302
work_ranges = []
@@ -319,30 +320,28 @@ def preprocess_video_and_text(self, fastvideo_args: FastVideoArgs, args):
319320
try:
320321
written = future.result()
321322
total_written += written
322-
logger.info(
323-
f"Processed chunk with {written} samples")
323+
logger.info("Processed chunk with %s samples",
324+
written)
324325
except Exception as e:
325326
work_range = futures[future]
326327
failed_ranges.append(work_range)
327-
logger.error(
328-
f"Failed to process range {work_range[0]}-{work_range[1]}: {str(e)}"
329-
)
328+
logger.error("Failed to process range %s-%s: %s",
329+
work_range[0], work_range[1], str(e))
330330

331331
# Retry failed ranges sequentially
332332
if failed_ranges:
333-
logger.warning(
334-
f"Retrying {len(failed_ranges)} failed ranges sequentially"
335-
)
333+
logger.warning("Retrying %s failed ranges sequentially",
334+
len(failed_ranges))
336335
for work_range in failed_ranges:
337336
try:
338337
total_written += self.process_chunk_range(
339338
work_range)
340339
except Exception as e:
341340
logger.error(
342-
f"Failed to process range {work_range[0]}-{work_range[1]} after retry: {str(e)}"
343-
)
341+
"Failed to process range %s-%s after retry: %s",
342+
work_range[0], work_range[1], str(e))
344343

345-
logger.info(f"Total samples written: {total_written}")
344+
logger.info("Total samples written: %s", total_written)
346345

347346
num_processed_samples = 0
348347
self.all_tables = []
@@ -373,6 +372,7 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
373372
prompt_embeds=[],
374373
prompt_attention_mask=[],
375374
)
375+
assert hasattr(self, "prompt_encoding_stage")
376376
result_batch = self.prompt_encoding_stage(batch, fastvideo_args)
377377
prompt_embeds = result_batch.prompt_embeds[0]
378378
prompt_attention_mask = result_batch.prompt_attention_mask[0]
@@ -388,8 +388,8 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
388388

389389
# Log the shapes after removing padding
390390
logger.info(
391-
f"Shape after removing padding - Embeddings: {text_embedding.shape}, Mask: {text_attention_mask.shape}"
392-
)
391+
"Shape after removing padding - Embeddings: %s, Mask: %s",
392+
text_embedding.shape, text_attention_mask.shape)
393393

394394
# Create record for Parquet dataset
395395
record = {
@@ -414,7 +414,7 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
414414
}
415415
batch_data.append(record)
416416

417-
logger.info(f"Saved validation sample: {file_name}")
417+
logger.info("Saved validation sample: %s", file_name)
418418

419419
if batch_data:
420420
# Add progress bar for writing to Parquet dataset
@@ -467,7 +467,7 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
467467
write_pbar.update(1)
468468
write_pbar.close()
469469

470-
logger.info(f"Total validation samples: {len(table)}")
470+
logger.info("Total validation samples: %s", len(table))
471471

472472
work_range = (0, 1, table, 0, validation_parquet_dir, len(table))
473473

@@ -484,22 +484,21 @@ def preprocess_validation_text(self, fastvideo_args: FastVideoArgs, args):
484484
except Exception as e:
485485
work_range = futures[future]
486486
failed_ranges.append(work_range)
487-
logger.error(
488-
f"Failed to process range {work_range[0]}-{work_range[1]}: {str(e)}"
489-
)
487+
logger.error("Failed to process range %s-%s: %s",
488+
work_range[0], work_range[1], str(e))
490489

491490
if failed_ranges:
492-
logger.warning(
493-
f"Retrying {len(failed_ranges)} failed ranges sequentially")
491+
logger.warning("Retrying %s failed ranges sequentially",
492+
len(failed_ranges))
494493
for work_range in failed_ranges:
495494
try:
496495
total_written += self.process_chunk_range(work_range)
497496
except Exception as e:
498497
logger.error(
499-
f"Failed to process range {work_range[0]}-{work_range[1]} after retry: {str(e)}"
500-
)
498+
"Failed to process range %s-%s after retry: %s",
499+
work_range[0], work_range[1], str(e))
501500

502-
logger.info(f"Total validation samples written: {total_written}")
501+
logger.info("Total validation samples written: %s", total_written)
503502

504503
# Clear memory
505504
del table
@@ -552,10 +551,9 @@ def process_chunk_range(args: Any) -> int:
552551

553552
return total_written
554553
except Exception as e:
555-
logger.error(
556-
f"Error processing chunks {start_idx}-{end_idx} for worker {worker_id}: {str(e)}"
557-
)
554+
logger.error("Error processing chunks %s-%s for worker %s: %s",
555+
start_idx, end_idx, worker_id, str(e))
558556
raise
559557

560558

561-
EntryClass = PreprocessPipeline
559+
EntryClass = PreprocessPipeline

0 commit comments

Comments
 (0)