Skip to content

Commit 60e3d6e

Browse files
committed
existing JSON report output file prevents run without --force
1 parent d13ee37 commit 60e3d6e

File tree

3 files changed

+83
-24
lines changed

3 files changed

+83
-24
lines changed

tests/test_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def test_archive_success(
202202
process_num=expected_process_num,
203203
handlers=BUILTIN_HANDLERS,
204204
)
205-
process_file_mock.assert_called_once_with(config, in_path)
205+
process_file_mock.assert_called_once_with(config, in_path, None)
206206
logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path)
207207

208208

unblob/cli.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def __init__(
9797
"--force",
9898
is_flag=True,
9999
show_default=True,
100-
help="Force extraction removing previously extracted files.",
100+
help="Force extraction even if outputs already exist (they are removed).",
101101
)
102102
@click.option(
103103
"-d",
@@ -201,17 +201,7 @@ def cli(
201201
)
202202

203203
logger.info("Start processing file", file=file)
204-
results = process_file(config, file)
205-
206-
if report_file:
207-
try:
208-
report_file.write_text(results.to_json())
209-
except IOError as e:
210-
logger.error("Can not write JSON report", path=report_file, msg=str(e))
211-
except Exception:
212-
logger.exception("Can not write JSON report", path=report_file)
213-
else:
214-
logger.info("JSON report written", path=report_file)
204+
results = process_file(config, file, report_file)
215205

216206
return results
217207

unblob/processing.py

Lines changed: 80 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import statistics
44
from operator import attrgetter
55
from pathlib import Path
6-
from typing import Iterable, List
6+
from typing import Iterable, List, Optional
77

88
import attr
99
import plotext as plt
@@ -30,6 +30,7 @@
3030
from .report import (
3131
ExtractDirectoryExistsReport,
3232
FileMagicReport,
33+
Report,
3334
StatReport,
3435
UnknownError,
3536
)
@@ -78,19 +79,36 @@ def get_extract_dir_for(self, path: Path) -> Path:
7879

7980

8081
@terminate_gracefully
81-
def process_file(config: ExtractionConfig, path: Path) -> ProcessResult:
82-
if not path.is_file():
83-
raise ValueError("path is not a file", path)
84-
82+
def process_file(
83+
config: ExtractionConfig, input_path: Path, report_file: Optional[Path] = None
84+
) -> ProcessResult:
8585
task = Task(
86-
path=path,
86+
path=input_path,
8787
depth=0,
8888
)
8989

90-
errors = check_extract_directory(task, config)
90+
if not input_path.is_file():
91+
raise ValueError("input_path is not a file", input_path)
92+
93+
errors = prepare_extract_dir(config, input_path)
94+
if not prepare_report_file(config, report_file):
95+
logger.error(
96+
"File not processed, as report could not be written", file=input_path
97+
)
98+
return ProcessResult()
99+
91100
if errors:
92-
return ProcessResult(errors)
101+
process_result = ProcessResult([TaskResult(task, errors)])
102+
else:
103+
process_result = _process_task(config, task)
93104

105+
if report_file:
106+
write_json_report(report_file, process_result)
107+
108+
return process_result
109+
110+
111+
def _process_task(config: ExtractionConfig, task: Task) -> ProcessResult:
94112
processor = Processor(config)
95113
aggregated_result = ProcessResult()
96114

@@ -112,21 +130,72 @@ def process_result(pool, result):
112130
return aggregated_result
113131

114132

115-
def check_extract_directory(task: Task, config: ExtractionConfig):
133+
def prepare_extract_dir(config: ExtractionConfig, input_file: Path) -> List[Report]:
116134
errors = []
117135

118-
extract_dir = config.get_extract_dir_for(task.path)
136+
extract_dir = config.get_extract_dir_for(input_file)
119137
if extract_dir.exists():
120138
if config.force_extract:
139+
logger.info("Removing extract dir", path=extract_dir)
121140
shutil.rmtree(extract_dir)
122141
else:
123142
report = ExtractDirectoryExistsReport(path=extract_dir)
124143
logger.error("Extraction directory already exist", path=str(extract_dir))
125-
errors.append(TaskResult(task, [report]))
144+
errors.append(report)
126145

127146
return errors
128147

129148

149+
def prepare_report_file(config: ExtractionConfig, report_file: Optional[Path]) -> bool:
150+
"""An in advance preparation to prevent report writing failing after an expensive extraction.
151+
152+
Returns True if there is no foreseen problem,
153+
False if report writing is known in advance to fail.
154+
"""
155+
if not report_file:
156+
# we will not write report at all
157+
return True
158+
159+
if report_file.exists():
160+
if config.force_extract:
161+
logger.warning("Removing existing report file", path=report_file)
162+
try:
163+
report_file.unlink()
164+
except OSError as e:
165+
logger.error(
166+
"Can not remove existing report file",
167+
path=report_file,
168+
msg=str(e),
169+
)
170+
return False
171+
else:
172+
logger.error(
173+
"Report file exists and --force not specified", path=report_file
174+
)
175+
return False
176+
177+
# check that the report directory can be written to
178+
try:
179+
report_file.write_text("")
180+
report_file.unlink()
181+
except OSError as e:
182+
logger.error("Can not create report file", path=report_file, msg=str(e))
183+
return False
184+
185+
return True
186+
187+
188+
def write_json_report(report_file: Path, process_result: ProcessResult):
189+
try:
190+
report_file.write_text(process_result.to_json())
191+
except OSError as e:
192+
logger.error("Can not write JSON report", path=report_file, msg=str(e))
193+
except Exception:
194+
logger.exception("Can not write JSON report", path=report_file)
195+
else:
196+
logger.info("JSON report written", path=report_file)
197+
198+
130199
class Processor:
131200
def __init__(self, config: ExtractionConfig):
132201
self._config = config

0 commit comments

Comments
 (0)