33import statistics
44from operator import attrgetter
55from pathlib import Path
6- from typing import Iterable , List
6+ from typing import Iterable , List , Optional
77
88import attr
99import plotext as plt
3030from .report import (
3131 ExtractDirectoryExistsReport ,
3232 FileMagicReport ,
33+ Report ,
3334 StatReport ,
3435 UnknownError ,
3536)
@@ -78,19 +79,36 @@ def get_extract_dir_for(self, path: Path) -> Path:
7879
7980
8081@terminate_gracefully
81- def process_file (config : ExtractionConfig , path : Path ) -> ProcessResult :
82- if not path .is_file ():
83- raise ValueError ("path is not a file" , path )
84-
82+ def process_file (
83+ config : ExtractionConfig , input_path : Path , report_file : Optional [Path ] = None
84+ ) -> ProcessResult :
8585 task = Task (
86- path = path ,
86+ path = input_path ,
8787 depth = 0 ,
8888 )
8989
90- errors = check_extract_directory (task , config )
90+ if not input_path .is_file ():
91+ raise ValueError ("input_path is not a file" , input_path )
92+
93+ errors = prepare_extract_dir (config , input_path )
94+ if not prepare_report_file (config , report_file ):
95+ logger .error (
96+ "File not processed, as report could not be written" , file = input_path
97+ )
98+ return ProcessResult ()
99+
91100 if errors :
92- return ProcessResult (errors )
101+ process_result = ProcessResult ([TaskResult (task , errors )])
102+ else :
103+ process_result = _process_task (config , task )
93104
105+ if report_file :
106+ write_json_report (report_file , process_result )
107+
108+ return process_result
109+
110+
111+ def _process_task (config : ExtractionConfig , task : Task ) -> ProcessResult :
94112 processor = Processor (config )
95113 aggregated_result = ProcessResult ()
96114
@@ -112,21 +130,72 @@ def process_result(pool, result):
112130 return aggregated_result
113131
114132
115- def check_extract_directory ( task : Task , config : ExtractionConfig ) :
133+ def prepare_extract_dir ( config : ExtractionConfig , input_file : Path ) -> List [ Report ] :
116134 errors = []
117135
118- extract_dir = config .get_extract_dir_for (task . path )
136+ extract_dir = config .get_extract_dir_for (input_file )
119137 if extract_dir .exists ():
120138 if config .force_extract :
139+ logger .info ("Removing extract dir" , path = extract_dir )
121140 shutil .rmtree (extract_dir )
122141 else :
123142 report = ExtractDirectoryExistsReport (path = extract_dir )
124143 logger .error ("Extraction directory already exist" , path = str (extract_dir ))
125- errors .append (TaskResult ( task , [ report ]) )
144+ errors .append (report )
126145
127146 return errors
128147
129148
149+ def prepare_report_file (config : ExtractionConfig , report_file : Optional [Path ]) -> bool :
150+ """An in advance preparation to prevent report writing failing after an expensive extraction.
151+
152+ Returns True if there is no foreseen problem,
153+ False if report writing is known in advance to fail.
154+ """
155+ if not report_file :
156+ # we will not write report at all
157+ return True
158+
159+ if report_file .exists ():
160+ if config .force_extract :
161+ logger .warning ("Removing existing report file" , path = report_file )
162+ try :
163+ report_file .unlink ()
164+ except OSError as e :
165+ logger .error (
166+ "Can not remove existing report file" ,
167+ path = report_file ,
168+ msg = str (e ),
169+ )
170+ return False
171+ else :
172+ logger .error (
173+ "Report file exists and --force not specified" , path = report_file
174+ )
175+ return False
176+
177+ # check that the report directory can be written to
178+ try :
179+ report_file .write_text ("" )
180+ report_file .unlink ()
181+ except OSError as e :
182+ logger .error ("Can not create report file" , path = report_file , msg = str (e ))
183+ return False
184+
185+ return True
186+
187+
188+ def write_json_report (report_file : Path , process_result : ProcessResult ):
189+ try :
190+ report_file .write_text (process_result .to_json ())
191+ except OSError as e :
192+ logger .error ("Can not write JSON report" , path = report_file , msg = str (e ))
193+ except Exception :
194+ logger .exception ("Can not write JSON report" , path = report_file )
195+ else :
196+ logger .info ("JSON report written" , path = report_file )
197+
198+
130199class Processor :
131200 def __init__ (self , config : ExtractionConfig ):
132201 self ._config = config
0 commit comments