Skip to content

Commit 0b831c9

Browse files
committed
processing: introduce ProcessResult
This object holds an association of `Task`s and their respective `TaskResult`s (and of course `Reports`). With this change we are able to reconstruct what report is for what task, and also what subtasks are coming from what tasks after the processing is finished. This is a useful basis for metadata support, as any additional information can later be added as reports.
1 parent 517bb77 commit 0b831c9

File tree

6 files changed

+52
-35
lines changed

6 files changed

+52
-35
lines changed

tests/test_cleanup.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010

1111
from unblob.models import File, Handler, Regex, ValidChunk
1212
from unblob.processing import ExtractionConfig, process_files
13-
from unblob.report import ErrorReport
14-
from unblob.testing import check_reports
13+
from unblob.testing import check_result
1514

1615
_ZIP_CONTENT = b"good file"
1716
# replacing _ZIP_CONTENT with _DAMAGED_ZIP_CONTENT will result in CRC error at unpacking time
@@ -57,7 +56,7 @@ def test_remove_extracted_chunks(input_dir: Path, output_dir: Path):
5756

5857
all_reports = process_files(config, input_dir)
5958
assert list(output_dir.glob("**/*.zip")) == []
60-
check_reports(all_reports)
59+
check_result(all_reports)
6160

6261

6362
def test_keep_all_problematic_chunks(input_dir: Path, output_dir: Path):
@@ -69,9 +68,7 @@ def test_keep_all_problematic_chunks(input_dir: Path, output_dir: Path):
6968

7069
all_reports = process_files(config, input_dir)
7170
# damaged zip file should not be removed
72-
assert [
73-
r for r in all_reports if isinstance(r, ErrorReport)
74-
] != [], "Unexpectedly no errors found!"
71+
assert all_reports.errors != [], "Unexpectedly no errors found!"
7572
assert list(output_dir.glob("**/*.zip"))
7673

7774

@@ -84,7 +81,7 @@ def test_keep_all_unknown_chunks(input_dir: Path, output_dir: Path):
8481

8582
all_reports = process_files(config, input_dir)
8683
assert list(output_dir.glob("**/*.unknown"))
87-
check_reports(all_reports)
84+
check_result(all_reports)
8885

8986

9087
class _HandlerWithNullExtractor(Handler):
@@ -105,4 +102,4 @@ def test_keep_chunks_with_null_extractor(input_dir: Path, output_dir: Path):
105102
)
106103
all_reports = process_files(config, input_dir)
107104
assert list(output_dir.glob("**/*.null"))
108-
check_reports(all_reports)
105+
check_result(all_reports)

tests/test_handlers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from unblob.processing import ExtractionConfig, process_files
1919
from unblob.testing import (
2020
check_output_is_the_same,
21-
check_reports,
21+
check_result,
2222
gather_integration_tests,
2323
)
2424

@@ -35,7 +35,7 @@ def test_all_handlers(
3535
all_reports = process_files(extraction_config, input_dir)
3636

3737
check_output_is_the_same(output_dir, extraction_config.extract_root)
38-
check_reports(all_reports)
38+
check_result(all_reports)
3939

4040

4141
@pytest.mark.parametrize(

unblob/cli.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
#!/usr/bin/env python3
22
import sys
33
from pathlib import Path
4-
from typing import Iterable, List, Optional
4+
from typing import Iterable, Optional
55

66
import click
77
from structlog import get_logger
88

9+
from unblob.models import ProcessResult
910
from unblob.plugins import UnblobPluginManager
10-
from unblob.report import Report, Severity
11+
from unblob.report import Severity
1112

1213
from .cli_options import verbosity_option
1314
from .dependencies import get_dependencies, pretty_format_dependencies
1415
from .handlers import BUILTIN_HANDLERS, Handlers
15-
from .logging import configure_logger, noformat
16+
from .logging import configure_logger
1617
from .processing import (
1718
DEFAULT_DEPTH,
1819
DEFAULT_PROCESS_NUM,
@@ -171,7 +172,7 @@ def cli(
171172
plugins_path: Optional[Path],
172173
handlers: Handlers,
173174
plugin_manager: UnblobPluginManager,
174-
) -> List[Report]:
175+
) -> ProcessResult:
175176
configure_logger(verbose, extract_root)
176177

177178
plugin_manager.import_plugins(plugins_path)
@@ -198,14 +199,12 @@ def cli(
198199
cli.context_class = UnblobContext
199200

200201

201-
def get_exit_code_from_reports(reports: List[Report]) -> int:
202+
def get_exit_code_from_reports(reports: ProcessResult) -> int:
202203
severity_to_exit_code = [
203204
(Severity.ERROR, 1),
204205
(Severity.WARNING, 0),
205206
]
206-
severities = {
207-
report.severity for report in reports if isinstance(report, ErrorReport)
208-
}
207+
severities = {error.severity for error in reports.errors}
209208

210209
for severity, exit_code in severity_to_exit_code:
211210
if severity in severities:

unblob/models.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import abc
2+
import itertools
23
from pathlib import Path
34
from typing import List, Optional, Tuple, Type
45

@@ -7,7 +8,7 @@
78

89
from .file_utils import Endian, File, InvalidInputFormat, StructParser
910
from .parser import hexstring2regex
10-
from .report import Report
11+
from .report import ErrorReport, Report
1112

1213
logger = get_logger()
1314

@@ -17,7 +18,7 @@
1718
#
1819

1920

20-
@attr.define
21+
@attr.define(frozen=True)
2122
class Task:
2223
path: Path
2324
depth: int
@@ -113,6 +114,21 @@ def add_subtask(self, task: Task):
113114
self.subtasks.append(task)
114115

115116

117+
@attr.define
118+
class ProcessResult:
119+
results: List[TaskResult] = attr.field(factory=list)
120+
121+
@property
122+
def errors(self) -> List[ErrorReport]:
123+
reports = itertools.chain.from_iterable(
124+
r.reports for r in self.results
125+
)
126+
return [r for r in reports if isinstance(r, ErrorReport)]
127+
128+
def register(self, result: TaskResult):
129+
self.results.append(result)
130+
131+
116132
class ExtractError(Exception):
117133
"""There was an error during extraction"""
118134

unblob/processing.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,19 @@
1717
from .iter_utils import pairwise
1818
from .logging import noformat
1919
from .math import shannon_entropy
20-
from .models import ExtractError, File, Task, TaskResult, UnknownChunk, ValidChunk
20+
from .models import (
21+
ExtractError,
22+
File,
23+
ProcessResult,
24+
Task,
25+
TaskResult,
26+
UnknownChunk,
27+
ValidChunk,
28+
)
2129
from .pool import make_pool
2230
from .report import (
2331
ExtractDirectoryExistsReport,
2432
FileMagicReport,
25-
Report,
2633
StatReport,
2734
UnknownError,
2835
)
@@ -60,15 +67,15 @@ class ExtractionConfig:
6067

6168

6269
@terminate_gracefully
63-
def process_files(config: ExtractionConfig, path: Path) -> List[Report]:
70+
def process_files(config: ExtractionConfig, path: Path) -> ProcessResult:
6471
task = Task(
6572
path=path,
6673
depth=0,
6774
)
6875

6976
errors = check_extract_directory(task, config)
7077
if errors:
71-
return errors
78+
return ProcessResult(errors)
7279

7380
result = _process_one_file(config, task)
7481

@@ -85,7 +92,7 @@ def check_extract_directory(task: Task, config: ExtractionConfig):
8592
else:
8693
report = ExtractDirectoryExistsReport(path=extract_dir)
8794
logger.error("Extraction directory already exist", **report.asdict())
88-
errors.append(report)
95+
errors.append(TaskResult(task, [report]))
8996

9097
return errors
9198

@@ -106,14 +113,14 @@ def get_existing_extract_dirs(
106113
return extract_dirs
107114

108115

109-
def _process_one_file(config: ExtractionConfig, root_task: Task) -> List[Report]:
116+
def _process_one_file(config: ExtractionConfig, root_task: Task) -> ProcessResult:
110117
processor = Processor(config)
111-
all_reports = []
118+
aggregated_result = ProcessResult()
112119

113120
def process_result(pool, result):
114121
for new_task in result.subtasks:
115122
pool.submit(new_task)
116-
all_reports.extend(result.reports)
123+
aggregated_result.register(result)
117124

118125
pool = make_pool(
119126
process_num=config.process_num,
@@ -124,7 +131,8 @@ def process_result(pool, result):
124131
with pool:
125132
pool.submit(root_task)
126133
pool.process_until_done()
127-
return all_reports
134+
135+
return aggregated_result
128136

129137

130138
class Processor:

unblob/testing.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
import shlex
22
import subprocess
33
from pathlib import Path
4-
from typing import List
54

65
import pytest
76
from pytest_cov.embed import cleanup_on_sigterm
87

98
from unblob.finder import build_hyperscan_database
109
from unblob.logging import configure_logger
10+
from unblob.models import ProcessResult
1111
from unblob.processing import ExtractionConfig
12-
from unblob.report import ErrorReport, Report
1312

1413

1514
@pytest.fixture(scope="session", autouse=True)
@@ -79,9 +78,7 @@ def check_output_is_the_same(reference_dir: Path, extract_dir: Path):
7978
pytest.fail(f"\nDiff command: {runnable_diff_command}\n{exc.stdout}\n")
8079

8180

82-
def check_reports(reports: List[Report]):
81+
def check_result(reports: ProcessResult):
8382
__tracebackhide__ = True
8483

85-
assert [
86-
r for r in reports if isinstance(r, ErrorReport)
87-
] == [], "Unexpected error reports"
84+
assert reports.errors == [], "Unexpected error reports"

0 commit comments

Comments
 (0)