|
35 | 35 | from .report import ( |
36 | 36 | CalculateMultiFileExceptionReport, |
37 | 37 | CarveDirectoryReport, |
38 | | - ExtractDirectoryExistsReport, |
39 | 38 | FileMagicReport, |
40 | 39 | HashReport, |
41 | 40 | MultiFileCollisionReport, |
| 41 | + OutputDirectoryExistsReport, |
42 | 42 | RandomnessMeasurements, |
43 | 43 | RandomnessReport, |
44 | 44 | Report, |
@@ -426,7 +426,7 @@ def _extract_multi_file(self, multi_file: MultiFile) -> Path: |
426 | 426 | raise DirectoryProcessingError( |
427 | 427 | "Skipped: extraction directory exists", |
428 | 428 | report=multi_file.as_report( |
429 | | - [ExtractDirectoryExistsReport(path=extract_dir)] |
| 429 | + [OutputDirectoryExistsReport(path=extract_dir)] |
430 | 430 | ), |
431 | 431 | ) |
432 | 432 |
|
@@ -507,24 +507,9 @@ def __init__( |
507 | 507 | self.size = size |
508 | 508 | self.result = result |
509 | 509 |
|
510 | | - self.carve_dir = config.get_extract_dir_for(self.task.path) |
511 | | - |
512 | 510 | def process(self): |
513 | 511 | logger.debug("Processing file", path=self.task.path, size=self.size) |
514 | 512 |
|
515 | | - if self.carve_dir.exists() and not self.config.skip_extraction: |
516 | | - # Extraction directory is not supposed to exist, it is usually a simple mistake of running |
517 | | - # unblob again without cleaning up or using --force. |
518 | | - # It would cause problems continuing, as it would mix up original and extracted files, |
519 | | - # and it would just introduce weird, non-deterministic problems due to interference on paths |
520 | | - # by multiple workers (parallel processing, modifying content (fix_symlink), |
521 | | - # and `mmap` + open for write with O_TRUNC). |
522 | | - logger.error( |
523 | | - "Skipped: extraction directory exists", extract_dir=self.carve_dir |
524 | | - ) |
525 | | - self.result.add_report(ExtractDirectoryExistsReport(path=self.carve_dir)) |
526 | | - return |
527 | | - |
528 | 513 | with File.from_path(self.task.path) as file: |
529 | 514 | all_chunks = search_chunks( |
530 | 515 | file, self.size, self.config.handlers, self.result |
@@ -576,11 +561,24 @@ def _process_chunks( |
576 | 561 | self._carve_then_extract_chunks(file, outer_chunks, unknown_chunks) |
577 | 562 |
|
578 | 563 | def _carve_then_extract_chunks(self, file, outer_chunks, unknown_chunks): |
| 564 | + assert not self.config.skip_extraction |
| 565 | + |
579 | 566 | carve_dir = self.config.get_carve_dir_for(self.task.path) |
580 | 567 |
|
581 | 568 | # report the technical carve directory explicitly |
582 | 569 | self.result.add_report(CarveDirectoryReport(carve_dir=carve_dir)) |
583 | 570 |
|
| 571 | + if carve_dir.exists(): |
| 572 | + # Carve directory is not supposed to exist, it is usually a simple mistake of running |
| 573 | + # unblob again without cleaning up or using --force. |
| 574 | + # It would cause problems continuing, as it would mix up original and extracted files, |
| 575 | + # and it would just introduce weird, non-deterministic problems due to interference on paths |
| 576 | + # by multiple workers (parallel processing, modifying content (fix_symlink), |
| 577 | + # and `mmap` + open for write with O_TRUNC). |
| 578 | + logger.error("Skipped: carve directory exists", carve_dir=carve_dir) |
| 579 | + self.result.add_report(OutputDirectoryExistsReport(path=carve_dir)) |
| 580 | + return |
| 581 | + |
584 | 582 | for chunk in unknown_chunks: |
585 | 583 | carved_unknown_path = carve_unknown_chunk(carve_dir, file, chunk) |
586 | 584 | randomness = self._calculate_randomness(carved_unknown_path) |
@@ -633,7 +631,7 @@ def _extract_chunk( |
633 | 631 | chunk=chunk, |
634 | 632 | ) |
635 | 633 | self.result.add_report( |
636 | | - chunk.as_report([ExtractDirectoryExistsReport(path=extract_dir)]) |
| 634 | + chunk.as_report([OutputDirectoryExistsReport(path=extract_dir)]) |
637 | 635 | ) |
638 | 636 | return |
639 | 637 |
|
|
0 commit comments