|
| 1 | +import math |
1 | 2 | import functools, itertools |
2 | | -from typing import Set, Dict, Optional, cast, List, DefaultDict |
| 3 | +from collections import defaultdict |
| 4 | +from typing import Set, Dict, Optional, cast, List, DefaultDict, Tuple |
3 | 5 | from pathlib import Path, PurePath |
4 | 6 | from smtcomp import defs |
5 | 7 | from rich import progress |
@@ -139,6 +141,38 @@ class PodiumSummaryResults(BaseModel): |
139 | 141 | layout: Literal["results_summary"] = "results_summary" |
140 | 142 |
|
141 | 143 |
|
| 144 | +class PodiumStepOverallScore(BaseModel): |
| 145 | + name: str |
| 146 | + contribution: float_6dig # nn_D * log10 N_D |
| 147 | + division: str |
| 148 | + tieBreakTimeScore: float_6dig |
| 149 | + |
| 150 | + |
| 151 | +class PodiumBestOverall(BaseModel): |
| 152 | + resultdate: str |
| 153 | + year: int |
| 154 | + results: str |
| 155 | + participants: str |
| 156 | + track: track_name |
| 157 | + recognition: Literal["best_overall"] = "best_overall" |
| 158 | + winner_seq: str |
| 159 | + winner_par: str |
| 160 | + winner_sat: str |
| 161 | + winner_unsat: str |
| 162 | + winner_24s: str |
| 163 | + winner_seq_score: float_6dig |
| 164 | + winner_par_score: float_6dig |
| 165 | + winner_sat_score: float_6dig |
| 166 | + winner_unsat_score: float_6dig |
| 167 | + winner_24s_score: float_6dig |
| 168 | + sequential: list[PodiumStepOverallScore] |
| 169 | + parallel: list[PodiumStepOverallScore] |
| 170 | + sat: list[PodiumStepOverallScore] |
| 171 | + unsat: list[PodiumStepOverallScore] |
| 172 | + twentyfour: list[PodiumStepOverallScore] |
| 173 | + layout: Literal["result_comp"] = "result_comp" |
| 174 | + |
| 175 | + |
142 | 176 | class PodiumStepBiggestLead(BaseModel): |
143 | 177 | name: str |
144 | 178 | second: str |
@@ -367,7 +401,7 @@ def get_kind(a: PodiumDivision, k: smtcomp.scoring.Kind) -> list[PodiumStep]: |
367 | 401 |
|
368 | 402 |
|
369 | 403 | # Computes the new global ranking based on the distance between the winner of a |
370 | | -# division and the second solver in a division as defined in secion 7.3.1 of |
| 404 | +# division and the second solver in a division as defined in section 7.3.1 of |
371 | 405 | # the SMT-COMP'19 rules. |
372 | 406 | # |
373 | 407 | # data : The podium as returned by process_csv. |
@@ -456,6 +490,130 @@ def get_winner(l: List[PodiumStepBiggestLead] | None) -> str: |
456 | 490 | ) |
457 | 491 |
|
458 | 492 |
|
| 493 | +# Compute normalized correctness score |
| 494 | +# |
| 495 | +# normalized correctness score: nnD = (nD /ND)**2 if eD == 0 |
| 496 | +# = -2 otherwise |
| 497 | +def normalized_correctness_score( |
| 498 | + data: dict[str, PodiumDivision], scores: pl.LazyFrame, track: defs.Track, k: smtcomp.scoring.Kind |
| 499 | +) -> list[PodiumStepOverallScore]: |
| 500 | + |
| 501 | + podiumSteps: list[PodiumStepOverallScore] = [] |
| 502 | + |
| 503 | + for division, div_data in data.items(): |
| 504 | + solvers_in_div = get_kind(div_data, k) |
| 505 | + if len(solvers_in_div) <= 1: |
| 506 | + continue |
| 507 | + |
| 508 | + N_D = get_N_D(scores, data, division, track) |
| 509 | + for sol_in_div in solvers_in_div: |
| 510 | + if sol_in_div.errorScore == 0: |
| 511 | + nn_D = (sol_in_div.correctScore / N_D) ** 2 |
| 512 | + else: |
| 513 | + nn_D = -2 |
| 514 | + |
| 515 | + podiumSteps.append( |
| 516 | + PodiumStepOverallScore( |
| 517 | + name=sol_in_div.name, |
| 518 | + contribution=nn_D * (math.log10(N_D) if N_D > 0 else 0), |
| 519 | + tieBreakTimeScore=sol_in_div.CPUScore if k == smtcomp.scoring.Kind.seq else sol_in_div.WallScore, |
| 520 | + division=division, |
| 521 | + ) |
| 522 | + ) |
| 523 | + podiumSteps = sorted(podiumSteps, key=lambda x: (x.contribution, x.tieBreakTimeScore), reverse=True) |
| 524 | + return podiumSteps |
| 525 | + |
| 526 | + |
| 527 | +# N_D := total number of check sats in division D if Incremental Track |
| 528 | +# total number of asserts in division D if Unsat Core Track |
| 529 | +# total number of benchmarks otherwise |
| 530 | +def get_N_D(scores: pl.LazyFrame, data: dict[str, PodiumDivision], division: str, track: defs.Track) -> int: |
| 531 | + if track == defs.Track.Incremental: |
| 532 | + return int( |
| 533 | + scores.unique(["division", "file"]) |
| 534 | + .group_by(["division"]) |
| 535 | + .agg([pl.col("check_sats").sum().alias("total_check_sats")]) |
| 536 | + .filter(pl.col("division") == int(defs.Division[division])) |
| 537 | + .collect()["total_check_sats"][0] |
| 538 | + ) |
| 539 | + |
| 540 | + elif track == defs.Track.UnsatCore: |
| 541 | + return int( |
| 542 | + scores.unique(["division", "file"]) |
| 543 | + .group_by(["division"]) |
| 544 | + .agg([pl.col("asserts").sum().alias("total_asserts")]) |
| 545 | + .filter(pl.col("division") == int(defs.Division[division])) |
| 546 | + .collect()["total_asserts"][0] |
| 547 | + ) |
| 548 | + |
| 549 | + return data[division].n_benchmarks |
| 550 | + |
| 551 | + |
| 552 | +# Computes the best overall ranking as specified in Section 7.3.1 of |
| 553 | +# SMT-COMP 2025 rules. I.e: |
| 554 | +# |
| 555 | +# normalized correctness score: nnD = (nD /ND)**2 if eD == 0 |
| 556 | +# = -2 otherwise |
| 557 | +# overall score : sum_D nnD*log10 ND |
| 558 | +# |
| 559 | +# For the choices, see the footnote in the rules. |
| 560 | +# |
| 561 | +def best_overall_ranking( |
| 562 | + config: defs.Config, scores: pl.LazyFrame, data: dict[str, PodiumDivision], track: defs.Track |
| 563 | +) -> PodiumBestOverall: |
| 564 | + def get_winner( |
| 565 | + l: Optional[List[PodiumStepOverallScore]], |
| 566 | + scores: pl.LazyFrame, |
| 567 | + data: dict[str, PodiumDivision], |
| 568 | + track: defs.Track, |
| 569 | + ) -> Tuple[str, float]: |
| 570 | + if l is None or not l: |
| 571 | + return ("-", 0.0) |
| 572 | + else: |
| 573 | + podium: DefaultDict[str, Dict[str, float]] = defaultdict(lambda: {"score": 0.0, "tie_break_time": 0.0}) |
| 574 | + for entry in l: |
| 575 | + podium[entry.name]["score"] += entry.contribution |
| 576 | + podium[entry.name]["tie_break_time"] += entry.tieBreakTimeScore |
| 577 | + winner, winner_data = max(podium.items(), key=lambda item: (item[1]["score"], -item[1]["tie_break_time"])) |
| 578 | + return (winner, winner_data["score"]) |
| 579 | + |
| 580 | + sequential = normalized_correctness_score(data, scores, track, smtcomp.scoring.Kind.seq) |
| 581 | + parallel = normalized_correctness_score(data, scores, track, smtcomp.scoring.Kind.par) |
| 582 | + sat = normalized_correctness_score(data, scores, track, smtcomp.scoring.Kind.sat) |
| 583 | + unsat = normalized_correctness_score(data, scores, track, smtcomp.scoring.Kind.unsat) |
| 584 | + twentyfour = normalized_correctness_score(data, scores, track, smtcomp.scoring.Kind.twentyfour) |
| 585 | + |
| 586 | + if track in (defs.Track.Cloud, defs.Track.Parallel): |
| 587 | + winner_seq = ("-", 0.0) |
| 588 | + sequential = [] |
| 589 | + else: |
| 590 | + winner_seq = get_winner(sequential, scores, data, track) |
| 591 | + |
| 592 | + return PodiumBestOverall( |
| 593 | + resultdate="2024-07-08", |
| 594 | + year=config.current_year, |
| 595 | + track=track, |
| 596 | + results=f"results_{config.current_year}", |
| 597 | + participants=f"participants_{config.current_year}", |
| 598 | + recognition="best_overall", |
| 599 | + winner_seq=winner_seq[0], |
| 600 | + winner_par=get_winner(parallel, scores, data, track)[0], |
| 601 | + winner_sat=get_winner(sat, scores, data, track)[0], |
| 602 | + winner_unsat=get_winner(unsat, scores, data, track)[0], |
| 603 | + winner_24s=get_winner(twentyfour, scores, data, track)[0], |
| 604 | + winner_seq_score=winner_seq[1], |
| 605 | + winner_par_score=get_winner(parallel, scores, data, track)[1], |
| 606 | + winner_sat_score=get_winner(sat, scores, data, track)[1], |
| 607 | + winner_unsat_score=get_winner(unsat, scores, data, track)[1], |
| 608 | + winner_24s_score=get_winner(twentyfour, scores, data, track)[1], |
| 609 | + sequential=sequential, |
| 610 | + parallel=parallel, |
| 611 | + sat=sat, |
| 612 | + unsat=unsat, |
| 613 | + twentyfour=twentyfour, |
| 614 | + ) |
| 615 | + |
| 616 | + |
459 | 617 | def largest_contribution_ranking( |
460 | 618 | config: defs.Config, |
461 | 619 | virtual_datas: Dict[str, PodiumDivision], |
@@ -634,6 +792,9 @@ def export_results(config: defs.Config, selection: pl.LazyFrame, results: pl.Laz |
634 | 792 | all_divisions.append(data) |
635 | 793 |
|
636 | 794 | if for_division: |
| 795 | + data_best_overall = best_overall_ranking(config, scores, datas, track) |
| 796 | + (dst / f"best-overall-{page_suffix}.md").write_text(data_best_overall.model_dump_json(indent=1)) |
| 797 | + |
637 | 798 | bigdata = biggest_lead_ranking(config, datas, track) |
638 | 799 | (dst / f"biggest-lead-{page_suffix}.md").write_text(bigdata.model_dump_json(indent=1)) |
639 | 800 |
|
|
0 commit comments