@@ -79,7 +79,7 @@ def load_overrides(overrides_dir: Path, lang: str) -> dict[str, str]:
7979
8080
8181def build_universal_rules (
82- classifications_dir : Path , holdout_lang : str
82+ classifications_dir : Path , holdout_lang : str ,
8383) -> tuple [dict [str , str ], dict [str , str ], dict [str , str ]]:
8484 """Build universal rules excluding the holdout language.
8585
@@ -183,7 +183,7 @@ def classify_thing_universal(
183183
184184
185185def evaluate_holdout (
186- classifications_dir : Path , holdout_lang : str , * , use_overrides : bool = False
186+ classifications_dir : Path , holdout_lang : str , * , use_overrides : bool = False ,
187187) -> dict [str , Any ]: # sourcery skip: low-code-quality
188188 """Run holdout evaluation for a single language."""
189189
@@ -197,7 +197,7 @@ def evaluate_holdout(
197197
198198 # Build universal rules excluding this language
199199 exact_rules , majority_rules , category_rules = build_universal_rules (
200- classifications_dir , holdout_lang
200+ classifications_dir , holdout_lang ,
201201 )
202202
203203 # Load overrides if requested
@@ -226,7 +226,7 @@ def evaluate_holdout(
226226 continue
227227
228228 predicted , method = classify_thing_universal (
229- entry , exact_rules , majority_rules , category_rules , overrides
229+ entry , exact_rules , majority_rules , category_rules , overrides ,
230230 )
231231 method_counts [method ] += 1
232232
@@ -303,20 +303,20 @@ def evaluate_holdout(
303303
304304
305305def print_summary_table (
306- label : str , results : list [dict [str , Any ]]
306+ label : str , results : list [dict [str , Any ]],
307307) -> tuple [float , float , float , float ]:
308308 """Print a summary table and return averages."""
309309 w = max (12 , max ((len (r ["language" ]) for r in results ), default = 12 ) + 2 )
310310 print (
311- f"\n { 'Language' :<{w }s} { 'Coverage' :>8s} { 'Accuracy' :>8s} { 'Overall' :>8s} { 'Tier-Wtd' :>8s} { 'Uncls' :>5s} { 'Wrong' :>5s} "
311+ f"\n { 'Language' :<{w }s} { 'Coverage' :>8s} { 'Accuracy' :>8s} { 'Overall' :>8s} { 'Tier-Wtd' :>8s} { 'Uncls' :>5s} { 'Wrong' :>5s} " ,
312312 )
313313 print (f" { '─' * w } { '─' * 8 } { '─' * 8 } { '─' * 8 } { '─' * 8 } { '─' * 5 } { '─' * 5 } " )
314314 for r in results :
315315 ovr = f" (+{ r ['override_count' ]} )" if r .get ("override_count" ) else ""
316316 print (
317317 f" { r ['language' ]:<{w }s} { r ['coverage_pct' ]:>7.1f} % { r ['accuracy_pct' ]:>7.1f} % "
318318 f"{ r ['overall_accuracy_pct' ]:>7.1f} % { r ['tier_weighted_accuracy_pct' ]:>7.1f} % "
319- f"{ r ['unclassified' ]:>5d} { r ['incorrect' ]:>5d} { ovr } "
319+ f"{ r ['unclassified' ]:>5d} { r ['incorrect' ]:>5d} { ovr } " ,
320320 )
321321
322322 avg_cov = sum (r ["coverage_pct" ] for r in results ) / len (results )
@@ -326,7 +326,7 @@ def print_summary_table(
326326 print (f" { '─' * w } { '─' * 8 } { '─' * 8 } { '─' * 8 } { '─' * 8 } { '─' * 5 } { '─' * 5 } " )
327327 print (
328328 f" { 'AVERAGE' :<{w }s} { avg_cov :>7.1f} % { avg_acc :>7.1f} % "
329- f"{ avg_ovr :>7.1f} % { avg_tier :>7.1f} %"
329+ f"{ avg_ovr :>7.1f} % { avg_tier :>7.1f} %" ,
330330 )
331331 return avg_cov , avg_acc , avg_ovr , avg_tier
332332
@@ -343,7 +343,7 @@ def main() -> int: # sourcery skip: low-code-quality
343343
344344 parser = argparse .ArgumentParser (description = "Holdout evaluation for language classifications" )
345345 parser .add_argument (
346- "--all" , action = "store_true" , help = "Evaluate ALL languages (not just holdout set)"
346+ "--all" , action = "store_true" , help = "Evaluate ALL languages (not just holdout set)" ,
347347 )
348348 parser .add_argument ("--lang" , nargs = "+" , help = "Evaluate specific language(s)" )
349349 args = parser .parse_args ()
@@ -406,7 +406,7 @@ def main() -> int: # sourcery skip: low-code-quality
406406 print (
407407 f" { lang :<14s} overall={ result ['overall_accuracy_pct' ]:5.1f} % "
408408 f"({ result ['correct' ]} /{ result ['total_things' ]} correct, "
409- f"{ result ['unclassified' ]} uncls, { result ['incorrect' ]} wrong)"
409+ f"{ result ['unclassified' ]} uncls, { result ['incorrect' ]} wrong)" ,
410410 )
411411
412412 _display_phase_intro ("\n " , "PHASE 1 SUMMARY" )
@@ -442,7 +442,7 @@ def main() -> int: # sourcery skip: low-code-quality
442442 f" { lang :<14s} overall={ result ['overall_accuracy_pct' ]:5.1f} % "
443443 f"({ result ['correct' ]} /{ result ['total_things' ]} correct, "
444444 f"{ result ['unclassified' ]} uncls, { result ['incorrect' ]} wrong)"
445- f"{ marker } "
445+ f"{ marker } " ,
446446 )
447447
448448 _display_phase_intro ("\n " , "PHASE 2 SUMMARY" )
@@ -456,7 +456,7 @@ def main() -> int: # sourcery skip: low-code-quality
456456 _display_phase_intro ("\n \n " , "COMPARISON: Baseline vs With Overrides" )
457457 w = max (12 , max ((len (r ["language" ]) for r in baseline_results ), default = 12 ) + 2 )
458458 print (
459- f"\n { 'Language' :<{w }s} { 'Baseline' :>8s} { 'Override' :>8s} { 'Delta' :>7s} { 'Override Lines' :>14s} "
459+ f"\n { 'Language' :<{w }s} { 'Baseline' :>8s} { 'Override' :>8s} { 'Delta' :>7s} { 'Override Lines' :>14s} " ,
460460 )
461461 print (f" { '─' * w } { '─' * 8 } { '─' * 8 } { '─' * 7 } { '─' * 14 } " )
462462
@@ -469,7 +469,7 @@ def main() -> int: # sourcery skip: low-code-quality
469469 print (
470470 f" { b ['language' ]:<{w }s} { b ['overall_accuracy_pct' ]:>7.1f} % "
471471 f"{ o ['overall_accuracy_pct' ]:>7.1f} % { sign } { delta :>5.1f} % "
472- f"{ ovr_count :>14d} "
472+ f"{ ovr_count :>14d} " ,
473473 )
474474
475475 delta_overall = avg_o [2 ] - avg_b [2 ]
@@ -493,7 +493,7 @@ def main() -> int: # sourcery skip: low-code-quality
493493 total_overrides = sum (r ["override_count" ] for r in override_results )
494494 langs_with_overrides = sum (r ["override_count" ] > 0 for r in override_results )
495495 print (
496- f" Override cost: { total_overrides } lines across { langs_with_overrides } files"
496+ f" Override cost: { total_overrides } lines across { langs_with_overrides } files" ,
497497 )
498498
499499 if final_avg [2 ] >= 95 :
0 commit comments