Skip to content

Commit 1601c99

Browse files
committed
Add id-s to chunks, and chunk_id-s to tasks
1 parent b17742b commit 1601c99

File tree

6 files changed

+23
-4
lines changed

6 files changed

+23
-4
lines changed

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@
1111

1212
@pytest.fixture
1313
def task_result():
14-
task = Task(Path("/nonexistent"), 0)
14+
task = Task(path=Path("/nonexistent"), depth=0, chunk_id="")
1515
return TaskResult(task)

tests/test_finder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import attr
12
import pytest
23

34
from unblob.file_utils import InvalidInputFormat
@@ -155,4 +156,4 @@ def test_search_chunks(content, expected_chunks, task_result):
155156

156157
assert len(chunks) == len(expected_chunks)
157158
for expected_chunk, chunk in zip(expected_chunks, chunks):
158-
assert chunk == expected_chunk
159+
assert attr.evolve(chunk, id="") == attr.evolve(expected_chunk, id="")

tests/test_processing.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from pathlib import Path
22
from typing import List
33

4+
import attr
45
import pytest
56

67
from unblob.models import UnknownChunk, ValidChunk
@@ -14,6 +15,14 @@
1415
)
1516

1617

18+
def assert_same_chunks(expected, actual, explanation=None):
19+
"""An assert, that ignores the chunk.id-s"""
20+
21+
assert len(expected) == len(actual), explanation
22+
for i, (e, a) in enumerate(zip(expected, actual)):
23+
assert attr.evolve(e, id="") == attr.evolve(a, id=""), explanation
24+
25+
1726
@pytest.mark.parametrize(
1827
"chunks, expected, explanation",
1928
[
@@ -74,7 +83,7 @@
7483
def test_remove_inner_chunks(
7584
chunks: List[ValidChunk], expected: List[ValidChunk], explanation: str
7685
):
77-
assert expected == remove_inner_chunks(chunks), explanation
86+
assert_same_chunks(expected, remove_inner_chunks(chunks), explanation)
7887

7988

8089
@pytest.mark.parametrize(
@@ -97,7 +106,7 @@ def test_remove_inner_chunks(
97106
def test_calculate_unknown_chunks(
98107
chunks: List[ValidChunk], file_size: int, expected: List[UnknownChunk]
99108
):
100-
assert expected == calculate_unknown_chunks(chunks, file_size)
109+
assert_same_chunks(expected, calculate_unknown_chunks(chunks, file_size))
101110

102111

103112
@pytest.mark.parametrize(

unblob/models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from structlog import get_logger
1010

1111
from .file_utils import Endian, File, InvalidInputFormat, StructParser
12+
from .identifiers import new_id
1213
from .parser import hexstring2regex
1314
from .report import ChunkReport, ErrorReport, Report
1415

@@ -24,6 +25,7 @@
2425
class Task:
2526
path: Path
2627
depth: int
28+
chunk_id: str
2729

2830

2931
@attr.define
@@ -43,6 +45,8 @@ class Chunk:
4345
end_offset: int
4446
"""The index of the first byte after the end of the chunk"""
4547

48+
id: str = attr.field(factory=new_id)
49+
4650
def __attrs_post_init__(self):
4751
if self.start_offset < 0 or self.end_offset < 0:
4852
raise InvalidInputFormat(f"Chunk has negative offset: {self}")
@@ -92,6 +96,7 @@ def extract(self, inpath: Path, outdir: Path):
9296

9397
def as_report(self, extraction_reports: List[Report]) -> ChunkReport:
9498
return ChunkReport(
99+
id=self.id,
95100
start_offset=self.start_offset,
96101
end_offset=self.end_offset,
97102
size=self.size,

unblob/processing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def process_file(
8383
config: ExtractionConfig, input_path: Path, report_file: Optional[Path] = None
8484
) -> ProcessResult:
8585
task = Task(
86+
chunk_id="",
8687
path=input_path,
8788
depth=0,
8889
)
@@ -233,6 +234,7 @@ def _process_task(self, result: TaskResult, task: Task):
233234
for path in task.path.iterdir():
234235
result.add_subtask(
235236
Task(
237+
chunk_id=task.chunk_id,
236238
path=path,
237239
depth=task.depth,
238240
)
@@ -359,6 +361,7 @@ def _extract_chunk(self, file, chunk: ValidChunk):
359361
if extract_dir.exists():
360362
self.result.add_subtask(
361363
Task(
364+
chunk_id=chunk.id,
362365
path=extract_dir,
363366
depth=self.task.depth + 1,
364367
)

unblob/report.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ def from_path(cls, path: Path):
153153

154154
@attr.define(kw_only=True)
155155
class ChunkReport(Report):
156+
id: str
156157
handler_name: str
157158
start_offset: int
158159
end_offset: int

0 commit comments

Comments
 (0)