Skip to content

Commit 3c33785

Browse files
committed
Drop support for directory as input
The output directory structure is expected to reflect the input structure. Having a single input file and a single extract directory is simple. However supporting directories as input has problems: subdirectories should be there in the output, but we also create directories for extraction, thus it is very easy to craft an input that has a conflicting output (usually an unblob output is one such input).
1 parent 1f6c0b2 commit 3c33785

File tree

6 files changed

+29
-25
lines changed

6 files changed

+29
-25
lines changed

.github/workflows/build-publish-image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
run: docker run --rm ${{ env.DOCKER_IMAGE }} --show-external-dependencies
5353

5454
- name: Check unblob - run for a file with --verbose
55-
run: docker run --rm -v "$(pwd)"/tests/integration/archive/zip/regular:/test ${{ env.DOCKER_IMAGE }} -v -e /tmp /test
55+
run: docker run --rm -v "$(pwd)"/tests/integration/archive/zip/regular:/test ${{ env.DOCKER_IMAGE }} -v -e /tmp /test/__input__/apple.zip
5656

5757
- name: Build and push
5858
if: ${{ github.event_name == 'push' && github.ref_name == 'main' }}

tests/test_cleanup.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,8 @@ def wrapzip(filename: str, content: bytes) -> bytes:
3434

3535

3636
@pytest.fixture()
37-
def input_dir(tmp_path: Path):
38-
input_dir = tmp_path / "input"
39-
input_dir.mkdir()
40-
return input_dir
37+
def input_file(tmp_path: Path):
38+
return tmp_path / "input_file"
4139

4240

4341
@pytest.fixture()
@@ -47,39 +45,39 @@ def output_dir(tmp_path):
4745
return output_dir
4846

4947

50-
def test_remove_extracted_chunks(input_dir: Path, output_dir: Path):
51-
(input_dir / "blob").write_bytes(ZIP_BYTES)
48+
def test_remove_extracted_chunks(input_file: Path, output_dir: Path):
49+
input_file.write_bytes(ZIP_BYTES)
5250
config = ExtractionConfig(
5351
extract_root=output_dir,
5452
entropy_depth=0,
5553
)
5654

57-
all_reports = process_file(config, input_dir)
55+
all_reports = process_file(config, input_file)
5856
assert list(output_dir.glob("**/*.zip")) == []
5957
check_result(all_reports)
6058

6159

62-
def test_keep_all_problematic_chunks(input_dir: Path, output_dir: Path):
63-
(input_dir / "blob").write_bytes(DAMAGED_ZIP_BYTES)
60+
def test_keep_all_problematic_chunks(input_file: Path, output_dir: Path):
61+
input_file.write_bytes(DAMAGED_ZIP_BYTES)
6462
config = ExtractionConfig(
6563
extract_root=output_dir,
6664
entropy_depth=0,
6765
)
6866

69-
all_reports = process_file(config, input_dir)
67+
all_reports = process_file(config, input_file)
7068
# damaged zip file should not be removed
7169
assert all_reports.errors != [], "Unexpectedly no errors found!"
7270
assert list(output_dir.glob("**/*.zip"))
7371

7472

75-
def test_keep_all_unknown_chunks(input_dir: Path, output_dir: Path):
76-
(input_dir / "blob").write_bytes(b"unknown1" + ZIP_BYTES + b"unknown2")
73+
def test_keep_all_unknown_chunks(input_file: Path, output_dir: Path):
74+
input_file.write_bytes(b"unknown1" + ZIP_BYTES + b"unknown2")
7775
config = ExtractionConfig(
7876
extract_root=output_dir,
7977
entropy_depth=0,
8078
)
8179

82-
all_reports = process_file(config, input_dir)
80+
all_reports = process_file(config, input_file)
8381
assert list(output_dir.glob("**/*.unknown"))
8482
check_result(all_reports)
8583

@@ -93,13 +91,13 @@ def calculate_chunk(self, file: File, start_offset: int) -> ValidChunk:
9391
return ValidChunk(start_offset=start_offset, end_offset=start_offset + 1)
9492

9593

96-
def test_keep_chunks_with_null_extractor(input_dir: Path, output_dir: Path):
97-
(input_dir / "blob").write_text("some text")
94+
def test_keep_chunks_with_null_extractor(input_file: Path, output_dir: Path):
95+
input_file.write_bytes(b"some text")
9896
config = ExtractionConfig(
9997
extract_root=output_dir,
10098
entropy_depth=0,
10199
handlers=(_HandlerWithNullExtractor,),
102100
)
103-
all_reports = process_file(config, input_dir)
101+
all_reports = process_file(config, input_file)
104102
assert list(output_dir.glob("**/*.null"))
105103
check_result(all_reports)

tests/test_cli.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,10 @@ def test_non_existing_file(tmp_path: Path):
127127
result = runner.invoke(unblob.cli.cli, ["--extract-dir", str(tmp_path), str(path)])
128128
assert result.exit_code == 2
129129
assert "Invalid value for 'FILE'" in result.output
130-
assert f"Path '{str(path)}' does not exist" in result.output
130+
assert f"File '{str(path)}' does not exist" in result.output
131131

132132

133-
def test_empty_dir_as_file(tmp_path: Path):
133+
def test_dir_for_file(tmp_path: Path):
134134
runner = CliRunner()
135135
out_path = tmp_path.joinpath("out")
136136
out_path.mkdir()
@@ -139,7 +139,7 @@ def test_empty_dir_as_file(tmp_path: Path):
139139
result = runner.invoke(
140140
unblob.cli.cli, ["--extract-dir", str(out_path), str(in_path)]
141141
)
142-
assert result.exit_code == 0
142+
assert result.exit_code != 0
143143

144144

145145
@pytest.mark.parametrize(
@@ -172,7 +172,8 @@ def test_archive_success(
172172
/ "archive"
173173
/ "zip"
174174
/ "regular"
175-
/ "__input__/"
175+
/ "__input__"
176+
/ "apple.zip"
176177
)
177178
process_file_mock = mock.MagicMock()
178179
logger_config_mock = mock.MagicMock()
@@ -214,7 +215,8 @@ def test_keep_extracted_chunks(
214215
/ "archive"
215216
/ "zip"
216217
/ "regular"
217-
/ "__input__/"
218+
/ "__input__"
219+
/ "apple.zip"
218220
)
219221
params = args + ["--extract-dir", str(tmp_path), str(in_path)]
220222

tests/test_handlers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,11 @@
3232
def test_all_handlers(
3333
input_dir: Path, output_dir: Path, extraction_config: ExtractionConfig
3434
):
35-
all_reports = process_file(extraction_config, input_dir)
35+
for input_file in input_dir.iterdir():
36+
reports = process_file(extraction_config, input_file)
37+
check_result(reports)
3638

3739
check_output_is_the_same(output_dir, extraction_config.extract_root)
38-
check_result(all_reports)
3940

4041

4142
@pytest.mark.parametrize(

unblob/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def __init__(
7979
@click.command(help=get_help_text())
8080
@click.argument(
8181
"file",
82-
type=click.Path(path_type=Path, exists=True, resolve_path=True),
82+
type=click.Path(path_type=Path, dir_okay=False, exists=True, resolve_path=True),
8383
required=True,
8484
)
8585
@click.option(

unblob/processing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ class ExtractionConfig:
6868

6969
@terminate_gracefully
7070
def process_file(config: ExtractionConfig, path: Path) -> ProcessResult:
71+
if not path.is_file():
72+
raise ValueError("path is not a file", path)
73+
7174
task = Task(
7275
path=path,
7376
depth=0,

0 commit comments

Comments
 (0)