1111from collections import Counter
1212from pathlib import Path
1313
14- REQUIRED_KEYS = frozenset ({"start" , "end" , "type" , "unit" , "qty" , "price" , "groupby" })
14+ REQUIRED_KEYS = frozenset (
15+ {"start" , "end" , "type" , "unit" , "qty" , "price" , "groupby" }
16+ )
1517
1618
1719def _is_valid_timestep (s : str ) -> bool :
@@ -20,7 +22,7 @@ def _is_valid_timestep(s: str) -> bool:
2022
2123
2224def _has_required_keys (obj : dict ) -> bool :
23- """Return True if obj has all required keys (start, end, type, unit, qty, price, groupby) ."""
25+ """Return True if obj has all required keys."""
2426 return REQUIRED_KEYS .issubset (obj .keys ())
2527
2628
@@ -99,7 +101,7 @@ def _find_arrays_in_text(text: str) -> list[tuple[str, str]]:
99101 if c == "]" :
100102 depth -= 1
101103 if depth == 0 :
102- slice_str = text [start : j + 1 ]
104+ slice_str = text [start : j + 1 ]
103105 try :
104106 arr = json .loads (slice_str )
105107 if (
@@ -110,7 +112,8 @@ def _find_arrays_in_text(text: str) -> list[tuple[str, str]]:
110112 and isinstance (arr [1 ], str )
111113 ):
112114 entry = json .loads (arr [1 ])
113- if isinstance (entry , dict ) and _has_required_keys (entry ):
115+ if (isinstance (entry , dict ) and
116+ _has_required_keys (entry )):
114117 pairs .append ((arr [0 ], arr [1 ]))
115118 except json .JSONDecodeError :
116119 pass
@@ -150,7 +153,8 @@ def extract_and_sort(json_path: Path) -> list[tuple[str, str]]:
150153
151154def main () -> None :
152155 parser = argparse .ArgumentParser (
153- description = "Extract and sort Loki log entries from JSON; print each and log_count."
156+ description = "Extract and sort Loki log entries from JSON; "
157+ "print each and log_count."
154158 )
155159 parser .add_argument (
156160 "-j" , "--json" ,
@@ -162,33 +166,38 @@ def main() -> None:
162166 "-o" , "--output" ,
163167 type = Path ,
164168 default = None ,
165- help = "Output path (default: same as input with _total.yml suffix, e.g. file.json -> file_total.yml )." ,
169+ help = "Output path (default: input with _total.yml suffix)." ,
166170 )
167171 parser .add_argument (
168172 "--debug" ,
169173 type = Path ,
170174 default = None ,
171175 metavar = "DIR" ,
172- help = "Debug directory: create if needed and write timestep list to <input_stem>_diff.txt." ,
176+ help = "Debug dir: create if needed, write timestep list to "
177+ "<input_stem>_diff.txt." ,
173178 )
174179 args = parser .parse_args ()
175180
176181 if not args .json .exists ():
177182 print (f"Error: input file not found: { args .json } " , file = sys .stderr )
178183 sys .exit (1 )
179184
180- output_path = args .output if args .output is not None else (args .json .parent / f"{ args .json .stem } _total.yml" )
185+ output_path = (
186+ args .output if args .output is not None
187+ else (args .json .parent / f"{ args .json .stem } _total.yml" )
188+ )
181189 pairs = extract_and_sort (args .json )
182190 out = output_path .open ("w" , encoding = "utf-8" )
183191
184- # Debug: write timestep list to <input_stem>_diff.txt in --debug directory (skip if "" or null)
192+ # Debug: write timestep list to <input_stem>_diff.txt (skip if "" or null)
185193 _debug_dir = str (args .debug ).strip () if args .debug is not None else ""
186194 if _debug_dir and _debug_dir != "." :
187195 args .debug .mkdir (parents = True , exist_ok = True )
188196 debug_path = args .debug / f"{ args .json .stem } _diff.txt"
189197 with debug_path .open ("w" , encoding = "utf-8" ) as dbg :
190198 for timestep , log_str in pairs :
191- print (json .dumps ([timestep , log_str ], ensure_ascii = False ), file = dbg )
199+ line = json .dumps ([timestep , log_str ], ensure_ascii = False )
200+ print (line , file = dbg )
192201
193202 # Count unique timesteps and entries per timestep
194203 log_count = len (pairs )
@@ -199,7 +208,7 @@ def main() -> None:
199208 if counts and not all (c == metrics_per_step for c in counts ):
200209 metrics_per_step = "ERROR"
201210
202- # Timesteps with error: those whose count differs from the expected (first) count
211+ # Timesteps with error: count differs from expected (first) count
203212 expected_count = counts [0 ] if counts else 0
204213 timesteps_with_error = [
205214 ts for ts , cnt in counts_per_timestep .items ()
@@ -217,27 +226,40 @@ def main() -> None:
217226 time_end_begin = last_entry .get ("start" )
218227 time_end = last_entry .get ("end" )
219228 else :
220- begin_nano = end_nano = time_begin = time_begin_end = time_end_begin = time_end = None
229+ begin_nano = end_nano = time_begin = time_begin_end = None
230+ time_end_begin = time_end = None
221231
222232 try :
223233 print ("---" , file = out )
224234 print ("time:" , file = out )
225235 print (" begin_step:" , file = out )
226- print (f" nanosec: { begin_nano } " if begin_nano is not None else " nanosec: null" , file = out )
227- print (f" begin: { repr (time_begin )} " if time_begin is not None else " begin: null" , file = out )
228- print (f" end: { repr (time_begin_end )} " if time_begin_end is not None else " end: null" , file = out )
236+ _bn = (f" nanosec: { begin_nano } " if begin_nano
237+ else " nanosec: null" )
238+ print (_bn , file = out )
239+ _b = (f" begin: { repr (time_begin )} " if time_begin
240+ else " begin: null" )
241+ print (_b , file = out )
242+ _be = (f" end: { repr (time_begin_end )} " if time_begin_end
243+ else " end: null" )
244+ print (_be , file = out )
229245 print (" end_step:" , file = out )
230- print (f" nanosec: { end_nano } " if end_nano is not None else " nanosec: null" , file = out )
231- print (f" begin: { repr (time_end_begin )} " if time_end_begin is not None else " begin: null" , file = out )
232- print (f" end: { repr (time_end )} " if time_end is not None else " end: null" , file = out )
246+ _en = (f" nanosec: { end_nano } " if end_nano
247+ else " nanosec: null" )
248+ print (_en , file = out )
249+ _eb = (f" begin: { repr (time_end_begin )} " if time_end_begin
250+ else " begin: null" )
251+ print (_eb , file = out )
252+ _ee = (f" end: { repr (time_end )} " if time_end
253+ else " end: null" )
254+ print (_ee , file = out )
233255 print ("data_log:" , file = out )
234256 print (f" total_timesteps: { total_timesteps } " , file = out )
235257 print (f" metrics_per_step: { metrics_per_step } " , file = out )
236258 print (f" log_count: { log_count } " , file = out )
237259 finally :
238260 out .close ()
239261
240- # Only errors go to stdout: timestep and its metric count for inconsistent timesteps
262+ # Only errors to stdout: timestep and metric count for inconsistent ones
241263 if metrics_per_step == "ERROR" :
242264 for ts in sorted (timesteps_with_error , key = int ):
243265 print (ts , counts_per_timestep [ts ], file = sys .stdout )
0 commit comments