@@ -559,13 +559,42 @@ def _process_chunks(
559559 self .result .add_report (chunk .as_report (extraction_reports = []))
560560 return
561561
562+ is_whole_file_chunk = len (outer_chunks ) + len (unknown_chunks ) == 1
563+ if is_whole_file_chunk :
564+ # skip carving, extract directly the whole file (chunk)
565+ carved_path = self .task .path
566+ for chunk in outer_chunks :
567+ self ._extract_chunk (
568+ carved_path ,
569+ chunk ,
570+ self .config .get_extract_dir_for (carved_path ),
571+ # since we do not carve, we want to keep the input around
572+ remove_extracted_input = False ,
573+ )
574+ else :
575+ self ._carve_then_extract_chunks (file , outer_chunks , unknown_chunks )
576+
577+ def _carve_then_extract_chunks (self , file , outer_chunks , unknown_chunks ):
578+ carve_dir = self .config .get_carve_dir_for (self .task .path )
579+
562580 for chunk in unknown_chunks :
563- carved_unknown_path = carve_unknown_chunk (self . carve_dir , file , chunk )
581+ carved_unknown_path = carve_unknown_chunk (carve_dir , file , chunk )
564582 randomness = self ._calculate_randomness (carved_unknown_path )
565583 self .result .add_report (chunk .as_report (randomness = randomness ))
566584
567585 for chunk in outer_chunks :
568- self ._extract_chunk (file , chunk )
586+ carved_path = carve_valid_chunk (carve_dir , file , chunk )
587+
588+ self ._extract_chunk (
589+ carved_path ,
590+ chunk ,
591+ self .config .get_extract_dir_for (carved_path ),
592+ # when a carved chunk is successfully extracted, usually
593+ # we want to get rid of it, as its data is available in
594+ # extracted format, and the raw data is still part of
595+ # the file the chunk belongs to
596+ remove_extracted_input = not self .config .keep_extracted_chunks ,
597+ )
569598
570599 def _calculate_randomness (self , path : Path ) -> Optional [RandomnessReport ]:
571600 if self .task .depth < self .config .randomness_depth :
@@ -581,17 +610,14 @@ def _calculate_randomness(self, path: Path) -> Optional[RandomnessReport]:
581610 return report
582611 return None
583612
584- def _extract_chunk (self , file , chunk : ValidChunk ): # noqa: C901
585- skip_carving = chunk .is_whole_file
586- if skip_carving :
587- inpath = self .task .path
588- extract_dir = self .carve_dir
589- carved_path = None
590- else :
591- inpath = carve_valid_chunk (self .carve_dir , file , chunk )
592- extract_dir = self .carve_dir / (inpath .name + self .config .carve_suffix )
593- carved_path = inpath
594-
613+ def _extract_chunk (
614+ self ,
615+ carved_path : Path ,
616+ chunk : ValidChunk ,
617+ extract_dir : Path ,
618+ * ,
619+ remove_extracted_input : bool ,
620+ ):
595621 if extract_dir .exists ():
596622 # Extraction directory is not supposed to exist, it mixes up original and extracted files,
597623 # and it would just introduce weird, non-deterministic problems due to interference on paths
@@ -613,10 +639,10 @@ def _extract_chunk(self, file, chunk: ValidChunk): # noqa: C901
613639
614640 extraction_reports = []
615641 try :
616- if result := chunk .extract (inpath , extract_dir ):
642+ if result := chunk .extract (carved_path , extract_dir ):
617643 extraction_reports .extend (result .reports )
618644
619- if carved_path and not self . config . keep_extracted_chunks :
645+ if remove_extracted_input :
620646 logger .debug ("Removing extracted chunk" , path = carved_path )
621647 carved_path .unlink ()
622648
0 commit comments