Skip to content

Commit c828597

Browse files
authored
Merge pull request #627 from onekey-sec/626-handle-dirhandler-exception
fix(processing): handle when DirectoryHandler.calculate_multifile throws exception
2 parents 5f2bd28 + 84ba230 commit c828597

File tree

3 files changed

+73
-2
lines changed

3 files changed

+73
-2
lines changed

tests/test_processing.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,16 @@ def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
507507
)
508508

509509

510+
class ExceptionDirHandler(SplitDirHandler):
511+
NAME = "exception-handler"
512+
PATTERN = Glob("*.part0")
513+
EXTRACTOR = None
514+
515+
def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
516+
del file
517+
raise ValueError("Something bad happened")
518+
519+
510520
class DummyTestHandler(Handler):
511521
NAME = "dummy"
512522
PATTERNS = [Regex("AA")]
@@ -775,3 +785,27 @@ def test_multi_file_extraction_failed(
775785
for report in multi_file_reports
776786
for extraction_report in report.extraction_reports
777787
)
788+
789+
790+
def test_multi_file_calculate_exception(
791+
multi_volume_zip: Path,
792+
multi_file_extraction_config: ExtractionConfig,
793+
extraction_root: Path,
794+
):
795+
multi_file_extraction_config.dir_handlers = (ExceptionDirHandler,)
796+
multi_file_extraction_config.handlers = (handlers.archive.zip.ZIPHandler,)
797+
798+
process_result = process_file(multi_file_extraction_config, multi_volume_zip)
799+
800+
task_result_by_path = {r.task.path: r for r in process_result.results}
801+
802+
directory = extraction_root / "input_extract"
803+
804+
multi_file_reports = task_result_by_path[directory].filter_reports(MultiFileReport)
805+
assert not multi_file_reports
806+
assert (directory / "test.part0") in task_result_by_path
807+
assert (directory / "test.part1") in task_result_by_path
808+
assert (directory / "test.part2") in task_result_by_path
809+
assert (directory / "test2.part0") in task_result_by_path
810+
assert (directory / "test2.part1") in task_result_by_path
811+
assert (directory / "other") in task_result_by_path

unblob/processing.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
from unblob.handlers import BUILTIN_DIR_HANDLERS, BUILTIN_HANDLERS, Handlers
1414

1515
from .extractor import carve_unknown_chunk, carve_valid_chunk, fix_extracted_directory
16-
from .file_utils import iterate_file
16+
from .file_utils import InvalidInputFormat, iterate_file
1717
from .finder import search_chunks
1818
from .iter_utils import pairwise
1919
from .logging import noformat
2020
from .models import (
2121
Chunk,
22+
DirectoryHandler,
2223
DirectoryHandlers,
2324
ExtractError,
2425
File,
@@ -31,6 +32,7 @@
3132
)
3233
from .pool import make_pool
3334
from .report import (
35+
CalculateMultiFileExceptionReport,
3436
EntropyReport,
3537
ExtractDirectoryExistsReport,
3638
FileMagicReport,
@@ -324,7 +326,7 @@ def _process_directory(self) -> Tuple[Set[Path], Set[Path]]:
324326
dir_handler = dir_handler_class()
325327

326328
for path in dir_handler.PATTERN.get_files(self.dir_task.path):
327-
multi_file = dir_handler.calculate_multifile(path)
329+
multi_file = self._calculate_multifile(dir_handler, path, self.result)
328330

329331
if multi_file is None:
330332
continue
@@ -349,6 +351,32 @@ def _process_directory(self) -> Tuple[Set[Path], Set[Path]]:
349351
processed_paths.update(multi_file.paths)
350352
return processed_paths, extract_dirs
351353

354+
@staticmethod
355+
def _calculate_multifile(
356+
dir_handler: DirectoryHandler, path: Path, task_result: TaskResult
357+
) -> Optional[MultiFile]:
358+
try:
359+
return dir_handler.calculate_multifile(path)
360+
except InvalidInputFormat as exc:
361+
logger.debug(
362+
"Invalid MultiFile format",
363+
exc_info=exc,
364+
handler=dir_handler.NAME,
365+
path=path,
366+
_verbosity=2,
367+
)
368+
except Exception as exc:
369+
error_report = CalculateMultiFileExceptionReport(
370+
handler=dir_handler.NAME,
371+
exception=exc,
372+
path=path,
373+
)
374+
task_result.add_report(error_report)
375+
logger.warning(
376+
"Unhandled Exception during multi file calculation",
377+
**error_report.asdict(),
378+
)
379+
352380
def _check_conflicting_files(
353381
self, multi_file: MultiFile, processed_paths: Set[Path]
354382
):

unblob/report.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,15 @@ class CalculateChunkExceptionReport(UnknownError):
7878
handler: str
7979

8080

81+
@attr.define(kw_only=True, frozen=True)
82+
class CalculateMultiFileExceptionReport(UnknownError):
83+
"""Describes an exception raised during calculate_chunk execution."""
84+
85+
path: Path
86+
# Stored in `str` rather than `Handler`, because the pickle picks ups structs from `C_DEFINITIONS`
87+
handler: str
88+
89+
8190
@attr.define(kw_only=True, frozen=True)
8291
class ExtractCommandFailedReport(ErrorReport):
8392
"""Describes an error when failed to run the extraction command."""

0 commit comments

Comments
 (0)