diff --git a/Makefile b/Makefile index 34126eb9..94eeb3c0 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,10 @@ build: clean-build ## Build wheel file using poetry clean-build: ## clean build artifacts @rm -rf dist +.PHONY: clean-web-results +clean-web-results: ## clean web results + @rm -rf web/content/results + .PHONY: help help: @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' @@ -76,8 +80,8 @@ results-generation: @poetry run smtcomp export-results-pages data Incremental # @echo "🚀 Generating results to web/content/results for Cloud" # @poetry run smtcomp export-results-pages data Cloud - # @echo "🚀 Generating results to web/content/results for Parallel" - # @poetry run smtcomp export-results-pages data Parallel + @echo "🚀 Generating results to web/content/results for Parallel" + @poetry run smtcomp export-results-pages data Parallel cache: @echo "🚀 Generating cache" diff --git a/README.md b/README.md index 1c58e2ad..8aec0c28 100644 --- a/README.md +++ b/README.md @@ -257,7 +257,7 @@ The step from Single Query can be followed with the model validation results whe Tasks for unsat core validation can be generated by: ``` -smtcomp generate-unsatcore-validation-files ../tmp/execution SCRAMBLER_EXECUTABLE UNSAT_CORE_RESULT_DIR/*/* +smtcomp generate-unsatcore-validation-files ../tmp/execution SCRAMBLER_EXECUTABLE UNSAT_CORE_RESULT_DIR ``` --- diff --git a/data/benchmarks-2025.json.gz b/data/benchmarks-2025.json.gz new file mode 100644 index 00000000..186cb57c Binary files /dev/null and b/data/benchmarks-2025.json.gz differ diff --git a/data/latex-certificates/gen_certificates_ornament.tex b/data/latex-certificates/gen_certificates_ornament.tex index 28800078..22edbce4 100644 --- a/data/latex-certificates/gen_certificates_ornament.tex +++ b/data/latex-certificates/gen_certificates_ornament.tex @@ -136,7 +136,7 @@ \node [circular drop shadow={shadow scale=1.05}, decorate, decoration=zigzag, fill=blue!20,draw,thick,circle,text width=3.5cm,align=center,xshift=2cm] - {\large\textbf{SMT-COMP\\ 2024}}; + {\large\textbf{SMT-COMP\\ 2025}}; \end{tikzpicture} \end{minipage} diff --git a/data/latex-certificates/solvers_pretty_name.csv b/data/latex-certificates/solvers_pretty_name.csv deleted file mode 100644 index 1725a325..00000000 --- a/data/latex-certificates/solvers_pretty_name.csv +++ /dev/null @@ -1,20 +0,0 @@ -Solver Name,Members -Algaroba,3 -Amaya,4 -Bitwuzla,2 -COLIBRI,3 -cvc5,15 -cvc5-cloud,5 -iProver v3.9,8 -OpenSMT,2 -OSTRICH,6 -plat-smt,1 -SMTInterpol,5 -SMT-RAT,3 -SMTS,3 -STP,6 -Yices2,7 -YicesQS,1 -Z3-Noodler,5 -Z3-alpha,5 -Z3-Parti-Z3++,2 diff --git a/data/results-inc-2025.json.gz b/data/results-inc-2025.json.gz new file mode 100644 index 00000000..e3391c51 Binary files /dev/null and b/data/results-inc-2025.json.gz differ diff --git a/data/results-mv-2025.json.gz b/data/results-mv-2025.json.gz new file mode 100644 index 00000000..657679f4 Binary files /dev/null and b/data/results-mv-2025.json.gz differ diff --git a/data/results-parallel-2025.json.gz b/data/results-parallel-2025.json.gz new file mode 100644 index 00000000..640454e8 Binary files /dev/null and b/data/results-parallel-2025.json.gz differ diff --git a/data/results-sq-2025.json.gz b/data/results-sq-2025.json.gz new file mode 100644 index 00000000..ba27f9f9 Binary files /dev/null and b/data/results-sq-2025.json.gz differ diff --git a/data/results-uc-2025.json.gz b/data/results-uc-2025.json.gz new file mode 100644 index 00000000..994813e1 Binary files /dev/null and b/data/results-uc-2025.json.gz differ diff --git a/smtcomp/benchexec.py b/smtcomp/benchexec.py index 0799060f..03e8db08 100644 --- a/smtcomp/benchexec.py +++ b/smtcomp/benchexec.py @@ -24,8 +24,10 @@ def get_suffix(track: defs.Track) -> str: return "_unsatcorevalidation" case defs.Track.SingleQuery: return "" - case _: - raise ValueError("No Cloud or Parallel") + case defs.Track.Parallel: + return "_parallel" + case defs.Track.Cloud: + return "_cloud" def get_xml_name(s: defs.Submission, track: defs.Track, division: defs.Division) -> str: @@ -44,8 +46,9 @@ class CmdTask(BaseModel): taskdirs: List[str] -def generate_benchmark_yml(benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path]) -> None: - ymlfile = benchmark.with_suffix(".yml") +def generate_benchmark_yml( + ymlfile: Path, benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path] +) -> None: with ymlfile.open("w") as f: f.write("format_version: '2.0'\n\n") @@ -103,7 +106,9 @@ def generate_tool_modules(s: defs.Submission, cachedir: Path) -> None: generate_tool_module(s, cachedir, False) -def generate_xml(config: defs.Config, cmdtasks: List[CmdTask], file: Path, tool_module_name: str) -> None: +def generate_xml( + config: defs.Config, cmdtasks: List[CmdTask], file: Path, tool_module_name: str, track: defs.Track, test: bool +) -> None: doc, tag, text = Doc().tagtext() doc.asis('') @@ -111,16 +116,28 @@ def generate_xml(config: defs.Config, cmdtasks: List[CmdTask], file: Path, tool_ '' ) + + timelimit = config.timelimit_s_test if test else config.timelimit_s + cpuCores = config.cpuCores_parallel if track == defs.Track.Parallel else config.cpuCores + memlimit = config.memlimit_M_parallel if track == defs.Track.Parallel else config.memlimit_M + + if test: + cpuCores = config.cpuCores_parallel_test if track == defs.Track.Parallel else config.cpuCores_test + memlimit = config.memlimit_M_parallel_test if track == defs.Track.Parallel else config.memlimit_M_test with tag( "benchmark", tool=f"tools.{tool_module_name}", - timelimit=f"{config.timelimit_s * config.cpuCores}s", - walltimelimit=f"{config.timelimit_s}s", - memlimit=f"{config.memlimit_M} MB", - cpuCores=f"{config.cpuCores}", + timelimit=f"{timelimit * cpuCores}s", + walltimelimit=f"{timelimit}s", + memlimit=f"{memlimit} MB", + cpuCores=f"{cpuCores}", ): - with tag("require", cpuModel="Intel Xeon E3-1230 v5 @ 3.40 GHz"): - text() + + if track != defs.Track.Parallel: + # we run the test jobs on different machines (main machines are used) + used_cpuModel = "Intel Core i7" if test else "Intel Xeon E3-1230 v5 @ 3.40 GHz" + with tag("require", cpuModel=used_cpuModel): + text() with tag("resultfiles"): text("**/error.log") @@ -222,7 +239,7 @@ def cmdtask_for_submission( return res -def generate(s: defs.Submission, cachedir: Path, config: defs.Config) -> None: +def generate(s: defs.Submission, cachedir: Path, config: defs.Config, test: bool) -> None: generate_tool_modules(s, cachedir) dst = cachedir / "benchmarks" @@ -241,7 +258,6 @@ def generate(s: defs.Submission, cachedir: Path, config: defs.Config) -> None: # cloud and parallel tracks are not executed via benchexec if target_track in ( defs.Track.Cloud, - defs.Track.Parallel, defs.Track.UnsatCoreValidation, defs.Track.ProofExhibition, ): @@ -258,6 +274,8 @@ def generate(s: defs.Submission, cachedir: Path, config: defs.Config) -> None: cmdtasks=res, file=file, tool_module_name=tool_module_name(s, target_track == defs.Track.Incremental), + track=target_track, + test=test, ) generated_divisions.append(division) @@ -273,9 +291,11 @@ def generate(s: defs.Submission, cachedir: Path, config: defs.Config) -> None: out("set -x") out(f"for DIVISION in {division_list}") out(" do\n") - out(f' TARGET="../final_results{track_suffix}/$DIVISION/{tool}"') + out(f' TARGET="../results/results{track_suffix}/$DIVISION/{tool}"') out(" rm -rf $TARGET") out(" mkdir -p $TARGET") + + extra_args = "" out( f" PYTHONPATH=$(pwd) benchexec/contrib/vcloud-benchmark.py run_definitions/{tool}{track_suffix}_$DIVISION.xml --read-only-dir / --overlay-dir . --overlay-dir /home --vcloudClientHeap 500 --vcloudPriority URGENT --cgroupAccess -o $TARGET" ) @@ -319,6 +339,6 @@ def generate_unsatcore_validation(s: defs.Submission, cachedir: Path, config: de out(" rm -rf $TARGET") out(" mkdir -p $TARGET") out( - f" PYTHONPATH=$(pwd) benchexec/contrib/vcloud-benchmark.py run_definitions/{tool}_unsatcorevalidation_$DIVISION.xml --read-only-dir / --overlay-dir . --overlay-dir /home --vcloudClientHeap 500 --vcloudPriority URGENT --cgroupAccess --tryLessMemory -o $TARGET" + f" PYTHONPATH=$(pwd) benchexec/contrib/vcloud-benchmark.py run_definitions/{tool}_unsatcorevalidation_$DIVISION.xml --read-only-dir / --overlay-dir . --overlay-dir /home --vcloudClientHeap 500 --vcloudPriority URGENT --cgroupAccess -o $TARGET" ) out("done") diff --git a/smtcomp/certificates.py b/smtcomp/certificates.py index 590732ab..f5dd1fda 100755 --- a/smtcomp/certificates.py +++ b/smtcomp/certificates.py @@ -10,6 +10,7 @@ import smtcomp.defs as defs import smtcomp.results as results import smtcomp.generate_website_page as page +import smtcomp.submission as submission show_experimental = False @@ -70,11 +71,13 @@ def withtrack(l: list[str], name: str, category: category) -> None: class overall: def __init__(self) -> None: + self.best = category() self.biggest = category() self.largest = category() def latex(self) -> str: l: list[str] = [] + withtrack(l, "Best Overall", self.best) withtrack(l, "Biggest Lead", self.biggest) withtrack(l, "Largest Contribution", self.largest) return ", ".join(l) @@ -83,7 +86,7 @@ def __str__(self) -> str: return self.latex() def isNotEmpty(self) -> bool: - return self.biggest.isNotEmpty() and self.largest.isNotEmpty() + return self.best.isNotEmpty() or self.biggest.isNotEmpty() or self.largest.isNotEmpty() class info: @@ -132,7 +135,7 @@ def __repr__(self) -> str: def update( solvers: defaultdict[str, info], select: Callable[[info, str], None], - podium: page.PodiumDivision | page.PodiumBiggestLead | page.PodiumLargestContribution, + podium: page.PodiumDivision | page.PodiumBestOverall | page.PodiumBiggestLead | page.PodiumLargestContribution, ) -> None: if podium.track == defs.Track.SingleQuery: select(solvers[podium.winner_seq], "sq_seq") @@ -173,12 +176,10 @@ def add_logic(logics: dict[Tuple[str, defs.Track], bool], list: dict[str, int], logics[v, track] = True -def parse_pretty_names(solvers: defaultdict[str, info], pretty_names: Path) -> None: - with open(pretty_names, newline="") as input: - input = csv.DictReader(input) # type: ignore - - for row in input: - solvers[row["Solver Name"]].members = int(row["Members"]) # type: ignore +def process_submissions(solvers: defaultdict[str, info], submissions: list[defs.Submission]) -> None: + for s in submissions: + if s.competitive: + solvers[s.name].members = len(s.contributors) def parse_experimental_division(solvers: Any, experimental_division: Path) -> dict[str, bool]: @@ -192,11 +193,14 @@ def parse_experimental_division(solvers: Any, experimental_division: Path) -> di def generate_certificates( - website_results: Path, input_for_certificates: Path, pretty_names: Path, experimental_division: Path + website_results: Path, input_for_certificates: Path, submission_dir: Path, experimental_division: Path ) -> None: solvers: defaultdict[str, info] = defaultdict(info) - parse_pretty_names(solvers, pretty_names) + submissions = [submission.read_submission_or_exit(f) + for f in submission_dir.glob("*.json")] + + process_submissions(solvers, submissions) solvers["-"].members = 0 # Remove experimental division @@ -223,6 +227,8 @@ def generate_certificates( continue case page.PodiumCrossDivision(): match result.root: + case page.PodiumBestOverall(): + update(solvers, (lambda x, k: x.overall.best.update(k, True)), result.root) case page.PodiumBiggestLead(): update(solvers, (lambda x, k: x.overall.biggest.update(k, True)), result.root) case page.PodiumLargestContribution(): diff --git a/smtcomp/defs.py b/smtcomp/defs.py index ebc3cc08..ef550a05 100644 --- a/smtcomp/defs.py +++ b/smtcomp/defs.py @@ -15,6 +15,17 @@ U = TypeVar("U") +baseMapSMTLIB2025 = { + "Bitwuzla-MachBV": "Bitwuzla-MachBV-base", + "Z3-Inc-Z3++": "Z3-Inc-Z3++-base", + "Z3-Noodler-Mocha": "Z3-Noodler-Mocha-base", + "Z3-Owl": "Z3-Owl-base", + "Z3-Noodler": "Z3-Noodler", + "z3siri": "z3siri-base", + "Z3-alpha": "Z3-alpha-base", +} + + class EnumAutoInt(Enum): """ Normal enum with strings, but each enum is associated to an int @@ -135,7 +146,7 @@ def name_is_default_field(cls, data: Any) -> Any: class SolverType(EnumAutoInt): wrapped = "wrapped" - derived = "derived" + derived = "derived" # TODO: put a datatype information on base solver standalone = "Standalone" portfolio = "Portfolio" @@ -1317,6 +1328,7 @@ class Submission(BaseModel, extra="forbid"): website: HttpUrl system_description: HttpUrl solver_type: SolverType + # TODO add field base_solver? participations: Participations seed: int | None = None competitive: bool = True @@ -1325,6 +1337,7 @@ class Submission(BaseModel, extra="forbid"): description="Must be set for the final version of the submission. An archive on zenodo is needed in this case.", ) + # TODO: model validator to check the sanity of the new base_solver field @model_validator(mode="after") def check_archive(self) -> Submission: if self.archive is None and not all(p.archive for p in self.participations.root): @@ -1446,13 +1459,20 @@ class Results(BaseModel): ## Parameters that can change each year class Config: __next_id__: ClassVar[int] = 0 - current_year = 2024 + current_year = 2025 oldest_previous_results = 2018 timelimit_s = 60 * 20 + timelimit_s_test = 60 # time limit for test runs memlimit_M = 1024 * 30 + memlimit_M_test = 1024 * 8 # memory limit for test runs + memlimit_M_parallel = 1024 * 1000 + memlimit_M_parallel_test = 1024 * 8 cpuCores = 4 - unsatcore_validation_timelimit_s = 60 * 5 - unsatcore_validation_memlimit_M = 1024 * 30 + cpuCores_test = 2 + cpuCores_parallel = 128 + cpuCores_parallel_test = 8 + unsatcore_validation_timelimit_s = 60 * 20 + unsatcore_validation_memlimit_M = 1024 * 15 unsatcore_validation_cpuCores = 4 min_used_benchmarks = 300 ratio_of_used_benchmarks = 0.5 @@ -1494,22 +1514,21 @@ class Config: removed_benchmarks = [ { - "logic": int(Logic.QF_LIA), - "family": "20210219-Dartagnan/ConcurrencySafety-Main", - "name": "39_rand_lock_p0_vs-O0.smt2", - } # scrambler segfault (perhaps stack limit) + "logic": int(Logic.UFDTNIA), + "family": "20241211-verus/verismo", + "name": "tspec__math__nonlinearverismo_tspec.math.nonlinear.proof_mul_pos_neg_rel._01.smt2", + }, # reported by Mathias Preiner as syntactically invalid + { + "logic": int(Logic.UFDTNIA), + "family": "20241211-verus/verismo", + "name": "tspec__math__nonlinearverismo_tspec.math.nonlinear.proof_div_pos_neg_rel._01.smt2", + }, # reported by Mathias Preiner as syntactically invalid ] """ - Benchmarks to remove before selection (currently just for aws) + Benchmarks to remove before selection """ - removed_results = [ - { - "logic": int(Logic.QF_BV), - "family": "20230221-oisc-gurtner", - "name": "SLL-NESTED-8-32-sp-not-excluded.smt2", - } # wrong status in SMTLIB - ] + removed_results = [] """ Benchmarks to remove after running the solvers. Can be used when the selection has already been done. """ @@ -1537,7 +1556,7 @@ def data(self) -> Path: @functools.cached_property def previous_years(self) -> list[int]: - return list(range(self.oldest_previous_results, self.current_year)) + return list(range(self.oldest_previous_results, self.current_year - 1)) @functools.cached_property def previous_results(self) -> list[tuple[int, Path]]: diff --git a/smtcomp/generate_website_page.py b/smtcomp/generate_website_page.py index ce3a5b0c..52a8f68c 100644 --- a/smtcomp/generate_website_page.py +++ b/smtcomp/generate_website_page.py @@ -94,6 +94,8 @@ def page_track_suffix(track: defs.Track) -> str: class PodiumStep(BaseModel): name: str + baseSolver: str + deltaBaseSolver: int competing: str # yes or no errorScore: int correctScore: int @@ -230,7 +232,7 @@ class PodiumLargestContribution(BaseModel): class PodiumCrossDivision(RootModel): - root: PodiumLargestContribution | PodiumBiggestLead = Field(..., discriminator="recognition") + root: PodiumBestOverall | PodiumLargestContribution | PodiumBiggestLead = Field(..., discriminator="recognition") class Podium(RootModel): @@ -241,10 +243,23 @@ def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]: if podium is None: return [] else: - return [ - PodiumStep( + podiums = [] + base_solvers = [] + for s in podium: + cscore = s["correctly_solved_score"] + delta = 0 + derived_solver = defs.baseMapSMTLIB2025.get(s["solver"], "") + if derived_solver != "": + for sprime in podium: + if sprime["solver"] == defs.baseMapSMTLIB2025.get(s["solver"], ""): + delta = cscore - sprime["correctly_solved_score"] + break + + ps = PodiumStep( name=s["solver"], - competing="yes", # TODO + baseSolver=derived_solver, + deltaBaseSolver=delta, + competing="no" if "-base" in s["solver"] else "yes", # TODO: establish s["competing"] errorScore=s["error_score"], correctScore=s["correctly_solved_score"], CPUScore=s["cpu_time_score"], @@ -257,8 +272,13 @@ def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]: timeout=s["timeout"], memout=s["memout"], ) - for s in podium - ] + + if "-base" in s["solver"]: + base_solvers.append(ps) + else: + podiums.append(ps) + + return podiums + base_solvers def make_podium(config: defs.Config, d: dict[str, Any], for_division: bool, track: defs.Track) -> PodiumDivision: @@ -284,7 +304,7 @@ def get_winner(l: List[dict[str, str]] | None) -> str: steps_seq = podium_steps(d[smtcomp.scoring.Kind.seq.name]) return PodiumDivision( - resultdate="2024-07-08", + resultdate="2025-08-11", year=config.current_year, divisions=f"divisions_{config.current_year}", participants=f"participants_{config.current_year}", @@ -472,7 +492,7 @@ def get_winner(l: List[PodiumStepBiggestLead] | None) -> str: winner_seq = get_winner(sequential) return PodiumBiggestLead( - resultdate="2024-07-08", + resultdate="2025-08-11", year=config.current_year, track=track, results=f"results_{config.current_year}", @@ -590,7 +610,7 @@ def get_winner( winner_seq = get_winner(sequential, scores, data, track) return PodiumBestOverall( - resultdate="2024-07-08", + resultdate="2025-08-11", year=config.current_year, track=track, results=f"results_{config.current_year}", @@ -685,7 +705,7 @@ def timeScore(vws_step: PodiumStep) -> float: steps_seq = ld[smtcomp.scoring.Kind.seq] return PodiumLargestContribution( - resultdate="2024-07-08", + resultdate="2025-08-11", year=config.current_year, track=track, results=f"results_{config.current_year}", @@ -768,7 +788,6 @@ def largest_contribution( def export_results(config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame, track: defs.Track) -> None: - page_suffix = page_track_suffix(track) dst = config.web_results diff --git a/smtcomp/incremental_tool.py b/smtcomp/incremental_tool.py index 40de13a5..911a7b14 100644 --- a/smtcomp/incremental_tool.py +++ b/smtcomp/incremental_tool.py @@ -30,21 +30,24 @@ def determine_result(self, run: BaseTool2.Run) -> Any: # type: ignore line = line.strip() if line in ("sat", "unsat"): correct += 1 + if "error" in line.lower(): + status = "ERROR" if line.startswith("WRONG"): return "WRONG" - if returnsignal is None: - status = "DONE" - elif ((returnsignal == 9) or (returnsignal == 15)) and isTimeout: - status = "TIMEOUT" - elif returnsignal == 9: - status = "KILLED BY SIGNAL 9" - elif returnsignal == 6: - status = "ABORTED" - elif returnsignal == 15: - status = "KILLED" - else: - status = "ERROR" + if status is None: + if returnsignal is None: + status = "DONE" + elif ((returnsignal == 9) or (returnsignal == 15)) and isTimeout: + status = "TIMEOUT" + elif returnsignal == 9: + status = "KILLED BY SIGNAL 9" + elif returnsignal == 6: + status = "ABORTED" + elif returnsignal == 15: + status = "KILLED" + else: + status = "ERROR" return f"{status} ({correct} correct)" @@ -69,10 +72,10 @@ def cmdline( # type: ignore assert len(tasks) <= 1, "only one inputfile supported" if options: # executable and options were overridden by the task definition - return [TRACE_EXECUTOR, *options, *tasks] + return [TRACE_EXECUTOR, "--continue-after-unknown", *options, *tasks] else: # using default executable - return [TRACE_EXECUTOR, executable, *tasks] + return [TRACE_EXECUTOR, "--continue-after-unknown", executable, *tasks] def program_files(self, executable: str) -> Any: files = [TRACE_EXECUTOR, executable] + self._program_files_from_executable(executable, self.REQUIRED_PATHS) diff --git a/smtcomp/main.py b/smtcomp/main.py index 3f2e07bd..ccfd9a69 100644 --- a/smtcomp/main.py +++ b/smtcomp/main.py @@ -103,7 +103,7 @@ def get_contacts(files: list[Path] = typer.Argument(None)) -> None: Find contact from submissions given as arguments """ l = list(map(submission.read_submission_or_exit, files)) - contacts = list(str(c) for c in itertools.chain.from_iterable([s.contacts for s in l])) + contacts = list(set((str(c) for c in itertools.chain.from_iterable([s.contacts for s in l if s.competitive])))) contacts.sort() print("\n".join(contacts)) @@ -166,6 +166,7 @@ def generate_benchexec( timelimit_s: int = defs.Config.timelimit_s, memlimit_M: int = defs.Config.memlimit_M, cpuCores: int = defs.Config.cpuCores, + test: bool = False, ) -> None: """ Generate the benchexec file for the given submissions @@ -180,31 +181,17 @@ def generate_benchexec( (cachedir / "tools").mkdir(parents=True, exist_ok=True) for file in track(files): s = submission.read(str(file)) - smtcomp.benchexec.generate(s, cachedir, config) + smtcomp.benchexec.generate(s, cachedir, config, test) smtcomp.benchexec.generate_unsatcore_validation(s, cachedir, config) @app.command(rich_help_panel=benchexec_panel) -def convert_benchexec_results( - results: Path, -) -> None: +def convert_benchexec_results(results: Path, no_cache: bool = False) -> None: """ Load benchexec results and aggregates results in feather format """ - lf = smtcomp.results.parse_dir(results) - lf.collect().write_ipc(results / "parsed.feather") - - -@app.command(rich_help_panel=benchexec_panel) -def convert_aws_results( - results: Path, -) -> None: - """ - Load aws results in cvs format and aggregates results in feather format - """ - - lf = smtcomp.results.parse_aws_csv(results) + lf = smtcomp.results.parse_dir(results, no_cache) lf.collect().write_ipc(results / "parsed.feather") @@ -252,9 +239,8 @@ def store_results( on=["file"], defaults={"logic": -1, "family": "", "name": ""}, ) - .join(removed_results, on=["logic", "family", "name"], how="anti") - .sort("file", "solver") - .collect() + # .join(removed_results, on=["logic", "family", "name"], how="anti") + .sort("file", "solver").collect() ) if len(df) > 0: results_track = defs.Results( @@ -814,11 +800,14 @@ def scramble_aws( dstdir: Path, scrambler: Path, max_workers: int = 8, + test: bool = False, ) -> None: """ Show statistics on the benchmarks selected for aws tracks """ config = defs.Config(data) + if test: + config.seed = 1 smtcomp.scramble_benchmarks.select_and_scramble_aws(config, srcdir, dstdir, scrambler, max_workers) @@ -1035,7 +1024,7 @@ def keyfunc( t2 = t.add(solver) for rid, r, result in rs: stderr = result.stderr.strip().replace("\n", ", ") - basename = smtcomp.scramble_benchmarks.scramble_basename(r.scramble_id) + basename = r.benchmark_yml match result.status: case defs.Answer.Unsat: status = "[red]unsat[/red]" @@ -1060,8 +1049,8 @@ def keyfunc( dst = outdir / f"{rid.solver}.{rid.participation}" dst.mkdir(parents=True, exist_ok=True) filedir = benchmark_files_dir(cachedir, rid.track) - basename = smtcomp.scramble_benchmarks.scramble_basename(r.scramble_id) - basename_model = smtcomp.scramble_benchmarks.scramble_basename(r.scramble_id, suffix="rsmt2") + basename = r.benchmark_yml + basename_model = r.benchmark_yml.replace(".smt2", ".rsmt2") smt2_file = filedir / str(r.logic) / basename (dst / basename).unlink(missing_ok=True) (dst / basename).symlink_to(smt2_file.absolute()) @@ -1070,10 +1059,9 @@ def keyfunc( @app.command() -def generate_unsatcore_validation_files( - cachedir: Path, scrambler: Path, resultdirs: list[Path], max_workers: int = 8 -) -> None: - unsat_core_validation.generate_validation_files(cachedir, resultdirs, scrambler) +def generate_unsatcore_validation_files(cachedir: Path, scrambler: Path, resultdir: Path) -> None: + + unsat_core_validation.generate_validation_files(cachedir, resultdir, scrambler) @app.command() @@ -1144,12 +1132,12 @@ def build_dolmen(data: Path) -> None: def generate_certificates( website_results: Path = Path("web/content/results"), input_for_certificates: Path = Path("data/latex-certificates/input_for_certificates.tex"), - pretty_names: Path = Path("data/latex-certificates/solvers_pretty_name.csv"), + submission_dir: Path = Path("submissions"), experimental_division: Path = Path("data/latex-certificates/experimental.csv"), ) -> None: """ generates the input data for the tex certificate generator. """ smtcomp.certificates.generate_certificates( - website_results, input_for_certificates, pretty_names, experimental_division + website_results, input_for_certificates, submission_dir, experimental_division ) diff --git a/smtcomp/model_validation.py b/smtcomp/model_validation.py index aec99638..62eb35a9 100644 --- a/smtcomp/model_validation.py +++ b/smtcomp/model_validation.py @@ -10,6 +10,7 @@ from rich.progress import Progress, TaskID from pydantic import BaseModel, RootModel, Field from typing import Union +import polars as pl import pydantic from smtcomp.unpack import write_cin, read_cin @@ -69,6 +70,7 @@ def check_locally(config: defs.Config, smt2_file: Path, model: str) -> defs.Vali elif ( r.stderr.endswith(b"E:id-def-conflict\n") or r.stderr.endswith(b"E:parsing-error\n") + or r.stderr.endswith(b"E:lexing-error\n") or r.stderr.endswith(b"E:unbound-id\n") or r.stderr.endswith(b"E:undefined-constant\n") ): @@ -87,16 +89,19 @@ def check_result_locally( rid: results.RunId, r: results.Run, model: str, + scramble_mapping: dict[int, int], ) -> defs.Validation: d = resultdir / "model_validation_results" - file_cache = d / f"{str(r.scramble_id)}.json.gz" + file_cache = d / f"{str(r.file)}.json.gz" + scramble_id = scramble_mapping[r.file] + if file_cache.is_file(): return defs.ValidationResult.model_validate_json(read_cin(file_cache)).root else: match r.answer: case defs.Answer.Sat: filedir = smtcomp.scramble_benchmarks.benchmark_files_dir(cachedir, rid.track) - basename = smtcomp.scramble_benchmarks.scramble_basename(r.scramble_id) + basename = smtcomp.scramble_benchmarks.scramble_basename(scramble_id) smt2_file = filedir / str(r.logic) / basename val = check_locally(config, smt2_file, model) case _: @@ -115,9 +120,7 @@ def prepare_model_validation_tasks( ( r.runid, b, - logfiles.get_output( - r.runid, smtcomp.scramble_benchmarks.scramble_basename(b.scramble_id, suffix="yml") - ), + logfiles.get_output(r.runid, b.benchmark_yml), resultdir, ) for r in results.parse_results(resultdir) @@ -130,6 +133,11 @@ def prepare_model_validation_tasks( def check_all_results_locally( config: defs.Config, cachedir: Path, resultdir: Path, executor: ThreadPool, progress: Progress ) -> list[tuple[results.RunId, results.Run, defs.Validation]]: + benchmark_dir = smtcomp.scramble_benchmarks.benchmark_files_dir(cachedir, defs.Track.ModelValidation) + mapping_csv = benchmark_dir / smtcomp.scramble_benchmarks.csv_original_id_name + assert mapping_csv.exists() + scramble_mapping = dict(pl.read_csv(mapping_csv).select("file", "scramble_id").iter_rows()) + raise_stack_limit() l = list( itertools.chain.from_iterable( @@ -143,7 +151,14 @@ def check_all_results_locally( return list( progress.track( executor.imap_unordered( - (lambda v: (v[0], v[1], check_result_locally(config, v[3], cachedir, v[0], v[1], v[2]))), l + ( + lambda v: ( + v[0], + v[1], + check_result_locally(config, v[3], cachedir, v[0], v[1], v[2], scramble_mapping), + ) + ), + l, ), description="Model validation", total=len(l), diff --git a/smtcomp/results.py b/smtcomp/results.py index 14b2444b..8c7a856d 100644 --- a/smtcomp/results.py +++ b/smtcomp/results.py @@ -30,7 +30,7 @@ def mangle(self) -> str: class Run(BaseModel): - scramble_id: int + file: int logic: defs.Logic cputime_s: float """ example: 0.211880968s""" @@ -40,6 +40,8 @@ class Run(BaseModel): """ example: true""" walltime_s: float """ example: 0.21279739192686975s""" + benchmark_yml: str + """ example: 467234_QF_ABVFP_20210211-Vector__RTOS_C_73c22d8f.yml""" # host: str # """ example: pontos07""" @@ -103,25 +105,26 @@ def parse_result(s: str) -> defs.Answer: return defs.Answer.Timeout if s.startswith("DONE"): return defs.Answer.Incremental - if s.startswith("OUT OF MEMORY"): + if s.startswith("OUT OF MEMORY") or s.startswith("KILLED BY SIGNAL 9"): return defs.Answer.OOM match s: case "false": return defs.Answer.Unsat case "true": return defs.Answer.Sat - case "unknown": + case "unknown" | "ERROR": return defs.Answer.Unknown - case "OUT OF MEMORY" | "OUT OF JAVA MEMORY": + case "OUT OF MEMORY" | "OUT OF JAVA MEMORY" | "KILLED BY SIGNAL 9": return defs.Answer.OOM case _: raise ValueError(f"Unknown result value {s}") -def convert_run(r: ET.Element) -> Run: +def convert_run(r: ET.Element) -> Run | None: parts = r.attrib["name"].split("/") logic = defs.Logic(parts[-2]) - scramble_id = smtcomp.scramble_benchmarks.unscramble_yml_basename(parts[-1]) + benchmark_yml = parts[-1] + benchmark_file = smtcomp.scramble_benchmarks.unscramble_yml_basename(benchmark_yml) cputime_s: Optional[float] = None memory_B: Optional[int] = None answer: Optional[defs.Answer] = None @@ -140,21 +143,23 @@ def convert_run(r: ET.Element) -> Run: walltime_s = parse_time(value) if cputime_s is None or memory_B is None or answer is None or walltime_s is None: - raise ValueError("xml of results doesn't contains some expected column") + print(f"xml of results doesn't contains some expected column for {r.attrib['name']}") + return None return Run( - scramble_id=scramble_id, + file=benchmark_file, logic=logic, cputime_s=cputime_s, memory_B=memory_B, answer=answer, walltime_s=walltime_s, + benchmark_yml=benchmark_yml, ) def parse_xml(file: Path) -> Results: result = ET.fromstring(read_cin(file)) - runs = list(map(convert_run, result.iterfind("run"))) + runs = list(filter(lambda r: r is not None, map(convert_run, result.iterfind("run")))) return Results(runid=RunId.unmangle(result.attrib["name"]), options=result.attrib["options"], runs=runs) @@ -223,17 +228,17 @@ def get_output(self: "LogFile", r: RunId, basename: str) -> str: return s[index:] -def mv_get_cached_results(resultdir: Path, scramble_id: int) -> defs.Validation | None: +def mv_get_cached_results(resultdir: Path, benchmark_id: int) -> defs.Validation | None: d = resultdir / "model_validation_results" - file_cache = d / f"{str(scramble_id)}.json.gz" + file_cache = d / f"{str(benchmark_id)}.json.gz" if file_cache.is_file(): return defs.ValidationResult.model_validate_json(read_cin(file_cache)).root else: return None -def mv_get_cached_answer(resultdir: Path, scramble_id: int) -> defs.Answer: - val = mv_get_cached_results(resultdir, scramble_id) +def mv_get_cached_answer(resultdir: Path, benchmark_id: int) -> defs.Answer: + val = mv_get_cached_results(resultdir, benchmark_id) if val is None: return defs.Answer.ModelNotValidated else: @@ -251,8 +256,8 @@ def mv_get_cached_answer(resultdir: Path, scramble_id: int) -> defs.Answer: re_inc_time = re.compile(r"^time ([0-9.]*)$", flags=re.MULTILINE) -def inc_get_nb_answers(logfiles: LogFile, runid: RunId, scramble_id: int) -> Tuple[defs.Answer, int, float | None]: - output = logfiles.get_output(runid, smtcomp.scramble_benchmarks.scramble_basename(scramble_id, suffix="yml")) +def inc_get_nb_answers(logfiles: LogFile, runid: RunId, yml_name: str) -> Tuple[defs.Answer, int, float | None]: + output = logfiles.get_output(runid, yml_name) if re_inc_trace_executor_wrong_output.search(output): return (defs.Answer.IncrementalError, 0, None) @@ -291,8 +296,8 @@ def get_unsat_core(output: str) -> UnsatCore | None: return items -def uc_get_uc(logfiles: LogFile, runid: RunId, scramble_id: int) -> UnsatCore | None: - output = logfiles.get_output(runid, smtcomp.scramble_benchmarks.scramble_basename(scramble_id, suffix="yml")) +def uc_get_uc(logfiles: LogFile, runid: RunId, yml_name: str) -> UnsatCore | None: + output = logfiles.get_output(runid, yml_name) return get_unsat_core(output) @@ -301,10 +306,10 @@ def to_pl(resultdir: Path, logfiles: LogFile, r: Results) -> pl.LazyFrame: def convert(a: Run) -> Dict[str, Any]: d = dict(a) if r.runid.track == defs.Track.ModelValidation and a.answer == defs.Answer.Sat: - a.answer = mv_get_cached_answer(resultdir, a.scramble_id) + a.answer = mv_get_cached_answer(resultdir, a.file) if r.runid.track == defs.Track.Incremental: - answer, nb_answers, last_time = inc_get_nb_answers(logfiles, r.runid, a.scramble_id) + answer, nb_answers, last_time = inc_get_nb_answers(logfiles, r.runid, a.benchmark_yml) a.answer = answer d["nb_answers"] = nb_answers # TODO: Since we forgot to readd timestamp for some answer @@ -313,10 +318,12 @@ def convert(a: Run) -> Dict[str, Any]: if last_time is not None: d["walltime_s"] = last_time d["cputime_s"] = last_time + else: + d["nb_answers"] = -1 if r.runid.track == defs.Track.UnsatCore: if d["answer"] == defs.Answer.Unsat: - uc = uc_get_uc(logfiles, r.runid, a.scramble_id) + uc = uc_get_uc(logfiles, r.runid, a.benchmark_yml) if uc is None: d["answer"] = defs.Answer.Unknown d["unsat_core"] = [] @@ -330,18 +337,21 @@ def convert(a: Run) -> Dict[str, Any]: # TODO: Since we forgot to readd timestamp for each answer # we don't have the time of the last answer for now # So we take the total for now + else: + d["unsat_core"] = [] d["answer"] = int(d["answer"]) d["logic"] = int(d["logic"]) return d - lf = pl.LazyFrame(map(convert, r.runs)) + # compute the list eagerly to avoid problems with 'infer_schema_length' + lf = pl.LazyFrame(list(map(convert, r.runs))) return lf.with_columns(solver=pl.lit(r.runid.solver), participation=r.runid.participation, track=int(r.runid.track)) -def parse_to_pl(file: Path) -> pl.LazyFrame: +def parse_to_pl(file: Path, no_cache: bool) -> pl.LazyFrame: feather = file.with_suffix(".feather") - if feather.exists(): + if not no_cache and feather.exists(): return pl.read_ipc(feather).lazy() with LogFile(file.parent) as logfiles: @@ -357,17 +367,17 @@ def parse_mapping(p: Path) -> pl.LazyFrame: return pl.LazyFrame( ( ( - smtcomp.scramble_benchmarks.unscramble_yml_basename(Path(k).name), + int(file), sorted(v["core"]), smtcomp.scramble_benchmarks.unscramble_yml_basename(Path(v["file"]).name), ) - for k, l in d.items() - for v in l + for file, cores in d.items() + for v in cores ), { - "scramble_id_orig": pl.Int64, + "orig_file": pl.Int64, "unsat_core": pl.List(pl.Int64), - "scramble_id": pl.Int64, + "file": pl.Int64, }, ) @@ -375,9 +385,9 @@ def parse_mapping(p: Path) -> pl.LazyFrame: json_mapping_name = "mapping.json" -def parse_dir(dir: Path) -> pl.LazyFrame: +def parse_dir(dir: Path, no_cache: bool) -> pl.LazyFrame: """ - output columns: solver, participation, track, basename, cputime_s, memory_B, status, walltime_s, scramble_id, file + output columns: solver, participation, track, basename, cputime_s, memory_B, status, walltime_s, file The track stored in the results is *not* used for some decisions: - if a file mapping.json is present it used and the original_id.csv is not needed @@ -386,48 +396,50 @@ def parse_dir(dir: Path) -> pl.LazyFrame: TODO: streamline the results directory hierarchy """ - csv = dir / smtcomp.scramble_benchmarks.csv_original_id_name - json = dir / json_mapping_name - if not csv.exists() and not json.exists(): - raise (ValueError(f"No file {csv!s} or {json!s} in the directory")) - - if csv.exists(): - lf = pl.read_csv(csv).lazy() - defaults: dict[str, Any] = {"file": -1} - else: - lf = parse_mapping(json) - defaults = {"unsat_core": [], "scramble_id_orig": -1} - l = list(dir.glob("**/*.xml.bz2")) if len(l) == 0: raise (ValueError(f"No results in the directory {dir!s}")) - l_parsed = list(track(map(parse_to_pl, l), total=len(l))) + l_parsed = list(track((parse_to_pl(f, no_cache) for f in l), total=len(l))) results = pl.concat(l_parsed) - results = add_columns(results, lf, on=["scramble_id"], defaults=defaults) - ucvr = dir / "../unsat_core_validation_results" / "parsed.feather" + uc_validation_results = dir / "../unsat_core_validation_results" / "parsed.feather" + + json = dir / json_mapping_name + if json.exists(): + # add information about the original benchmark to each UC validation run + lf = parse_mapping(json) + results = add_columns(results.drop("unsat_core"), lf, on=["file"], defaults={"unsat_core": [], "orig_file": -1}) + if (dir.name).endswith("unsatcore"): - if ucvr.is_file(): - vr = pl.read_ipc(ucvr).lazy() + if uc_validation_results.is_file(): + # compute stats of validated and refuted cores + vr = pl.read_ipc(uc_validation_results).lazy() vr = ( - vr.select("answer", "unsat_core", scramble_id="scramble_id_orig") - .group_by("scramble_id", "unsat_core") + vr.select("answer", "unsat_core", file="orig_file") + .group_by("file", "unsat_core") .agg( - sat=(pl.col("answer") == int(defs.Answer.Sat)).count(), - unsat=(pl.col("answer") == int(defs.Answer.Unsat)).count(), + sat=(pl.col("answer") == int(defs.Answer.Sat)).sum(), + unsat=(pl.col("answer") == int(defs.Answer.Unsat)).sum(), validation_attempted=True, ) ) + results = add_columns( results, vr, - on=["scramble_id", "unsat_core"], + on=["file", "unsat_core"], defaults={"sat": 0, "unsat": 0, "validation_attempted": False}, ) + + # change answer according to the validity of the core results = results.with_columns( - answer=pl.when((pl.col("answer") == int(defs.Answer.Unsat)) & (pl.col("sat") > pl.col("unsat"))) - .then(int(defs.Answer.UnsatCoreInvalidated)) - .otherwise("answer") + answer=pl.when((pl.col("answer") == int(defs.Answer.Unsat)) & (pl.col("sat") >= pl.col("unsat"))) + .then( + pl.when(pl.col("sat") == 0) + .then(int(defs.Answer.Unknown)) # sat == unsat == 0 + .otherwise(int(defs.Answer.UnsatCoreInvalidated)) + ) + .otherwise("answer") # sat < unsat ).drop("sat", "unsat", "unsat_core") else: results = results.with_columns(validation_attempted=False) @@ -449,7 +461,7 @@ def helper_get_results( The second value returned is the selection """ - if len(results) == 0: + if results is None or len(results) == 0: lf = ( pl.read_ipc(config.cached_current_results[track]) .lazy() @@ -466,7 +478,8 @@ def helper_get_results( ) else: lf = pl.concat(pl.read_ipc(p / "parsed.feather").lazy() for p in results) - lf = lf.filter(track=int(track)).drop("scramble_id") + lf = lf.drop("logic", "participation") # Hack for participation 0 bug move "participation" to on= for 2025, + lf = lf.drop("benchmark_yml", "unsat_core") selection = smtcomp.selection.helper(config, track).filter(selected=True).with_columns(track=int(track)) @@ -484,61 +497,9 @@ def helper_get_results( selected = add_columns( selected, - lf.drop("logic", "participation"), # Hack for participation 0 bug move "participation" to on= for 2025 + lf, on=["file", "solver", "track"], - defaults={ - "answer": -1, - "cputime_s": 0, - "memory_B": 0, - "walltime_s": 0, - "nb_answers": 0, - }, + defaults={"answer": -1, "cputime_s": 0, "memory_B": 0, "walltime_s": 0, "nb_answers": -1}, ) return selected, selection - - -def parse_aws_csv(dir: Path) -> pl.LazyFrame: - """ - output columns: solver, participation, track, cputime_s, memory_B, status, walltime_s, scramble_id, file, answer - - Assumes that there is a file results.csv in the directory dir. The file - must contain columns: solver, scramble_id, logic, solver_time, file, track, solver_result - """ - - def aws_logic(logic: str) -> int: - return int(defs.Logic(logic)) - - def aws_track(track: str) -> int: - return int(defs.Track(track)) - - def aws_result(res: str) -> int: - match res: - case "unsat": - return int(defs.Answer.Unsat) - case "sat": - return int(defs.Answer.Sat) - case _: - return int(defs.Answer.Unknown) - - csv = dir / "results.csv" - if not csv.exists(): - raise (ValueError(f"results.csv missing in the directory")) - lf = pl.scan_csv(csv).select( - pl.col("solver"), - pl.col("scramble_id"), - pl.col("logic").apply(aws_logic, return_dtype=pl.Int64).alias("logic"), - pl.col("solver_time").alias("walltime_s"), - pl.col("file"), - pl.col("track").apply(aws_track, return_dtype=pl.Int32).alias("track"), - pl.col("solver_result").map_elements(aws_result, return_dtype=pl.Int64).alias("answer"), - ) - - results = lf.with_columns( - participation=pl.lit(0, dtype=pl.Int64), - memory_B=pl.lit(0, dtype=pl.Int64), - nb_answers=pl.lit(0, dtype=pl.Int64), - cputime_s=pl.lit(0, dtype=pl.Int64), - ) - - return results diff --git a/smtcomp/scramble_benchmarks.py b/smtcomp/scramble_benchmarks.py index 302e0cb7..88138dcd 100644 --- a/smtcomp/scramble_benchmarks.py +++ b/smtcomp/scramble_benchmarks.py @@ -21,19 +21,18 @@ def scramble_basename(id: int, suffix: str = "smt2") -> str: def unscramble_yml_basename(basename: str) -> int: - # We are unscrabling scrambled%i.yml - assert basename[0:9] == "scrambled" - # assert basename[-4:] == ".yml" + # We are unscrabling {i}.yml or scrambled{i}_{core}.smt2 if basename[-4:] == ".yml": - return int(basename[9:-4]) + parts = basename.split("_") + file_id = parts[0] + return int(file_id) else: + assert basename[0:9] == "scrambled" assert basename[-5:] == ".smt2" s = basename[9:-5] - l = list(map(int, s.split("_"))) - i = l[0] - if len(l) > 1: - i += l[1] * 10_000_000 # Hack for unsat_core_verification - return i + file_id, core = list(map(int, s.split("_"))) + file_id += core * 10_000_000 # Hack for unsat_core_verification + return file_id def benchmark_files_dir(cachedir: Path, track: defs.Track) -> Path: @@ -58,34 +57,40 @@ def scramble_file(fdict: dict, incremental: bool, srcdir: Path, dstdir: Path, ar i = "incremental" else: i = "non-incremental" - fsrc = ( + orig_path = ( srcdir.joinpath(i) .joinpath(str(defs.Logic.of_int(fdict["logic"]))) .joinpath(Path(fdict["family"])) .joinpath(fdict["name"]) ) dstdir = dstdir.joinpath(str(defs.Logic.of_int(fdict["logic"]))) - fdst = dstdir.joinpath(scramble_basename(fdict["scramble_id"])) + scrambled_path = dstdir.joinpath(scramble_basename(fdict["scramble_id"])) dstdir.mkdir(parents=True, exist_ok=True) if incremental: subprocess.run( ["grep", "-o", "(set-info :status \\(sat\\|unsat\\|unknown\\))"], - stdin=fsrc.open("r"), - stdout=fdst.open("w"), + stdin=orig_path.open("r"), + stdout=scrambled_path.open("w"), ) - subprocess.run(["sed", "-i", "s/(set-info :status \\(.*\\))/\\1/", str(fdst)]) - with fdst.open("a") as dstfile: + subprocess.run(["sed", "-i", "s/(set-info :status \\(.*\\))/\\1/", str(scrambled_path)]) + with scrambled_path.open("a") as dstfile: dstfile.write("--- BENCHMARK BEGINS HERE ---\n") - subprocess.run(args, stdin=fsrc.open("r"), stdout=fdst.open("a")) + subprocess.run(args, stdin=orig_path.open("r"), stdout=scrambled_path.open("a")) else: try: - subprocess.run(args, stdin=fsrc.open("r"), stdout=fdst.open("w"), check=True) + subprocess.run(args, stdin=orig_path.open("r"), stdout=scrambled_path.open("w"), check=True) except subprocess.CalledProcessError as e: - print(f"[red]Warning[/red] scrambler crashed on {fsrc}") + print(f"[red]Warning[/red] scrambler crashed on {orig_path}") - expected = get_expected_result(fsrc) if not incremental else None - generate_benchmark_yml(fdst, expected, fsrc.relative_to(srcdir)) + expected = get_expected_result(orig_path) if not incremental else None + + mangled_name = "_".join( + [str(fdict["file"]), str(defs.Logic.of_int(fdict["logic"])), fdict["family"].replace("/", "__"), fdict["name"]] + ) + yaml_dst = dstdir.joinpath(mangled_name).with_suffix(".yml") + + generate_benchmark_yml(yaml_dst, scrambled_path, expected, orig_path.relative_to(srcdir)) def create_scramble_id(benchmarks: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame: @@ -177,7 +182,7 @@ def select_and_scramble_aws( all = intersect(solvers, selected, on=["track", "logic"]).collect().lazy() for name, track in [("cloud", defs.Track.Cloud), ("parallel", defs.Track.Parallel)]: - dst = dstdir / name / "non-incremental" + dst = benchmark_files_dir(dstdir, track) dst.mkdir(parents=True, exist_ok=True) scramble_lazyframe( diff --git a/smtcomp/selection.py b/smtcomp/selection.py index 252f439a..af2917fa 100644 --- a/smtcomp/selection.py +++ b/smtcomp/selection.py @@ -89,7 +89,7 @@ def add_trivial_run_info(benchmarks: pl.LazyFrame, previous_results: pl.LazyFram "result": int(defs.Status.Unknown), "current_result": int(defs.Status.Unknown), }, - ).with_columns(new=pl.col("family").str.starts_with(str(config.current_year))) + ).with_columns(new=pl.col("family").str.starts_with(str(config.current_year - 1))) if config.use_previous_results_for_status: with_info = with_info.with_columns( @@ -174,6 +174,8 @@ def helper_compute_non_incremental(config: defs.Config, track: SimpleNonIncremen Returned columns: file (uniq id), logic, family,name, status, asserts nunmber, trivial, run (in previous year), new (benchmarks), selected """ benchmarks = pl.read_ipc(config.cached_non_incremental_benchmarks).lazy() + benchmarks = benchmarks.join(removed_benchmarks(config), on=["logic", "family", "name"], how="anti") + results = pl.read_ipc(config.cached_previous_results).lazy() match track: @@ -198,7 +200,9 @@ def helper_compute_incremental(config: defs.Config) -> pl.LazyFrame: """ Returned columns: file (uniq id), logic, family,name, status, asserts nunmber, trivial, run (in previous year), new (benchmarks), selected """ - benchmarks = pl.read_ipc(config.cached_incremental_benchmarks) + benchmarks = pl.read_ipc(config.cached_incremental_benchmarks).lazy() + benchmarks = benchmarks.join(removed_benchmarks(config), on=["logic", "family", "name"], how="anti") + results = pl.read_ipc(config.cached_previous_results) benchmarks_with_info = add_trivial_run_info(benchmarks.lazy(), results.lazy(), config) if config.invert_triviality: diff --git a/smtcomp/tool.py b/smtcomp/tool.py index 723cba42..60b43753 100644 --- a/smtcomp/tool.py +++ b/smtcomp/tool.py @@ -47,6 +47,8 @@ def determine_result(self, run: BaseTool2.Run) -> Any: # type: ignore return result.RESULT_FALSE_PROP elif line == "sat": return result.RESULT_TRUE_PROP + elif "error" in line.lower(): + return result.RESULT_ERROR elif line == "TIMEOUT": return result.RESULT_TIMEOUT else: diff --git a/smtcomp/unsat_core_validation.py b/smtcomp/unsat_core_validation.py index bfe8ffa3..9c60fabb 100644 --- a/smtcomp/unsat_core_validation.py +++ b/smtcomp/unsat_core_validation.py @@ -6,6 +6,7 @@ import smtcomp.scramble_benchmarks from rich.progress import track import rich +import polars as pl import re from tempfile import NamedTemporaryFile from os.path import splitext @@ -24,7 +25,6 @@ def get_unsat_core(output: str) -> UnsatCore | None: assert len(answers) <= 1, "Multiple unsat cores!" if not answers: - print("No unsat core") return None core = answers[0][0] @@ -50,51 +50,64 @@ def generate_validation_file( rid: results.RunId, r: results.Run, scrambler: Path, - generated_files: defaultdict[Path, dict[FrozenUnsatCore, Path]], + scramble_mapping: dict[int, int], + generated_files: defaultdict[int, dict[FrozenUnsatCore, Path]], + target_dir: Path, ) -> None: assert r.answer == defs.Answer.Unsat filedir = smtcomp.scramble_benchmarks.benchmark_files_dir(cachedir, rid.track) - basename = smtcomp.scramble_benchmarks.scramble_basename(r.scramble_id) + + scramble_id = scramble_mapping[r.file] + basename = smtcomp.scramble_benchmarks.scramble_basename(scramble_id) benchmark_name = Path(str(r.logic)) / basename smt2_file = filedir / benchmark_name - solver_output = logfiles.get_output(rid, smtcomp.scramble_benchmarks.scramble_basename(r.scramble_id, suffix="yml")) + solver_output = logfiles.get_output(rid, r.benchmark_yml) core = get_unsat_core(solver_output) if core is None: + print(f"No unsat core for {r.benchmark_yml}") return - basename_file, basename_ext = splitext(basename) - (cachedir / "benchmarks" / "files_unsatcorevalidation" / str(r.logic)).mkdir(parents=True, exist_ok=True) - outdir = cachedir / "benchmarks" / "files_unsatcorevalidation" + (target_dir / str(r.logic)).mkdir(parents=True, exist_ok=True) frozen_core = tuple(core) - if frozen_core in generated_files[benchmark_name]: + if frozen_core in generated_files[r.file]: return - core_id = len(generated_files[benchmark_name]) + basename_file, basename_ext = splitext(basename) + core_id = len(generated_files[r.file]) validation_file = Path(str(r.logic)) / f"{basename_file}_{core_id}{basename_ext}" - validation_filepath = outdir / validation_file + validation_filepath = target_dir / validation_file create_validation_file(smt2_file, core, scrambler, validation_filepath) - generated_files[benchmark_name][frozen_core] = validation_file + generated_files[r.file][frozen_core] = validation_file -def generate_validation_files(cachedir: Path, resultdirs: list[Path], scrambler: Path) -> None: +def generate_validation_files(cachedir: Path, resultdir: Path, scrambler: Path) -> None: + benchmark_dir = smtcomp.scramble_benchmarks.benchmark_files_dir(cachedir, defs.Track.UnsatCore) target_dir = cachedir / "benchmarks" / "files_unsatcorevalidation" target_dir.mkdir(parents=True, exist_ok=True) - generated_files: defaultdict[Path, dict[FrozenUnsatCore, Path]] = defaultdict(dict) + mapping_csv = benchmark_dir / smtcomp.scramble_benchmarks.csv_original_id_name + assert mapping_csv.exists() + scramble_mapping = dict(pl.read_csv(mapping_csv).select("file", "scramble_id").iter_rows()) + + generated_files: defaultdict[int, dict[FrozenUnsatCore, Path]] = defaultdict(dict) - for resultdir in resultdirs: - rich.print(f"[green]Processing[/green] {resultdir}") - with results.LogFile(resultdir) as logfiles: + logfiles = list(resultdir.glob("**/*.logfiles.zip")) + for logfile in logfiles: + resultdir = logfile.parent + rich.print(f"[green]Processing[/green] {logfile}") + with results.LogFile(resultdir) as f: l = [ (r.runid, b) for r in results.parse_results(resultdir) for b in r.runs if b.answer == defs.Answer.Unsat ] - for r in track(l): - generate_validation_file(cachedir, logfiles, r[0], r[1], scrambler, generated_files) + for runid, run in track(l): + generate_validation_file( + cachedir, f, runid, run, scrambler, scramble_mapping, generated_files, target_dir + ) with open(target_dir / "mapping.json", "w") as f: - data = {str(k): [{"core": c, "file": str(f)} for (c, f) in v.items()] for (k, v) in generated_files.items()} + data = {k: [{"core": c, "file": str(f)} for (c, f) in v.items()] for (k, v) in generated_files.items()} json.dump(data, f) diff --git a/submissions/STP-Parti-Bitwuzla-p16.json b/submissions/STP-Parti-Bitwuzla-p16.json index e343e55e..79c8df9e 100644 --- a/submissions/STP-Parti-Bitwuzla-p16.json +++ b/submissions/STP-Parti-Bitwuzla-p16.json @@ -1,5 +1,5 @@ { - "name": "STP-Parti-Bitwuzla", + "name": "STP-Parti-Bitwuzla-16core", "contributors": [ "Mengyu Zhao", "Zhenghang Xu", diff --git a/submissions/STP-Parti-Bitwuzla-p32.json b/submissions/STP-Parti-Bitwuzla-p32.json index e72bf73b..e806cc64 100644 --- a/submissions/STP-Parti-Bitwuzla-p32.json +++ b/submissions/STP-Parti-Bitwuzla-p32.json @@ -1,5 +1,5 @@ { - "name": "STP-Parti-Bitwuzla", + "name": "STP-Parti-Bitwuzla-64core", "contributors": [ "Mengyu Zhao", "Zhenghang Xu", diff --git a/submissions/STP-Parti-Bitwuzla-p8.json b/submissions/STP-Parti-Bitwuzla-p8.json index f314ddfe..8e8f3436 100644 --- a/submissions/STP-Parti-Bitwuzla-p8.json +++ b/submissions/STP-Parti-Bitwuzla-p8.json @@ -1,5 +1,5 @@ { - "name": "STP-Parti-Bitwuzla", + "name": "STP-Parti-Bitwuzla-8core", "contributors": [ "Mengyu Zhao", "Zhenghang Xu", diff --git a/submissions/Z3-Parti-Z3pp-p16.json b/submissions/Z3-Parti-Z3pp-p16.json index 306bbfb8..ed933335 100644 --- a/submissions/Z3-Parti-Z3pp-p16.json +++ b/submissions/Z3-Parti-Z3pp-p16.json @@ -1,5 +1,5 @@ { - "name": "Z3-Parti-Z3pp", + "name": "Z3-Parti-Z3pp-16core", "contributors": [ "Mengyu Zhao", "Shaowei Cai" diff --git a/submissions/Z3-Parti-Z3pp-p32.json b/submissions/Z3-Parti-Z3pp-p32.json index 2779c767..edc79836 100644 --- a/submissions/Z3-Parti-Z3pp-p32.json +++ b/submissions/Z3-Parti-Z3pp-p32.json @@ -1,5 +1,5 @@ { - "name": "Z3-Parti-Z3pp", + "name": "Z3-Parti-Z3pp-32core", "contributors": [ "Mengyu Zhao", "Shaowei Cai" diff --git a/submissions/Z3-Parti-Z3pp-p8.json b/submissions/Z3-Parti-Z3pp-p8.json index b347c9f1..451aedd8 100644 --- a/submissions/Z3-Parti-Z3pp-p8.json +++ b/submissions/Z3-Parti-Z3pp-p8.json @@ -1,5 +1,5 @@ { - "name": "Z3-Parti-Z3pp", + "name": "Z3-Parti-Z3pp-8core", "contributors": [ "Mengyu Zhao", "Shaowei Cai" diff --git a/submissions/bitwuzla_parallel_16.json b/submissions/bitwuzla_parallel_16.json index 02a0abdb..f2b6c74e 100644 --- a/submissions/bitwuzla_parallel_16.json +++ b/submissions/bitwuzla_parallel_16.json @@ -1,5 +1,5 @@ { - "name": "Bitwuzla", + "name": "Bitwuzla-16core", "contributors": ["Aina Niemetz", "Mathias Preiner"], "contacts": ["Mathias Preiner "], "final" : true, diff --git a/submissions/bitwuzla_parallel_32.json b/submissions/bitwuzla_parallel_32.json index 06113d44..20bb9a0a 100644 --- a/submissions/bitwuzla_parallel_32.json +++ b/submissions/bitwuzla_parallel_32.json @@ -1,5 +1,5 @@ { - "name": "Bitwuzla", + "name": "Bitwuzla-32core", "contributors": ["Aina Niemetz", "Mathias Preiner"], "contacts": ["Mathias Preiner "], "final" : true, diff --git a/web/themes/smtcomp/assets/css/main.css b/web/themes/smtcomp/assets/css/main.css index 6d9216c3..8203b2d2 100644 --- a/web/themes/smtcomp/assets/css/main.css +++ b/web/themes/smtcomp/assets/css/main.css @@ -593,7 +593,7 @@ td.left { } td.non-competing-grey { - color: #707070; + color: gray; a { color: #808080; } diff --git a/web/themes/smtcomp/layouts/_default/result.html b/web/themes/smtcomp/layouts/_default/result.html index 8e1e5a4f..f23861e7 100644 --- a/web/themes/smtcomp/layouts/_default/result.html +++ b/web/themes/smtcomp/layouts/_default/result.html @@ -6,6 +6,9 @@ {{ $trueDivision := and (isset .Params "logics") (.Params.logics | len | ne 0) }} {{ $prettyTrack := index .Site.Data.pretty_names.track .Params.track }} + {{ $hasBaseSolvers := false }} + {{ $hasNoncompetingSolvers := false }} +

{{ .Params.division }} ({{ $prettyTrack }})

Competition results for the {{ .Params.division }} @@ -88,30 +91,61 @@

{{ index $category_names $cat }} Performance

{{ range $solver := . }} - - + +{{ if eq $solver.competing "no" }} + {{ $.Scratch.Set "hasNonCompetingSolvers" true }} +{{ end }} + + + + {{ $solver.name }} {{ if eq $solver.competing "no" }}n{{ end }} - {{ $solver.errorScore }} + {{ $solver.errorScore }} {{ if $solver.errorFootnote }} * {{ end }} - {{ $solver.correctScore }} - {{ $solver.CPUScore }} - {{ $solver.WallScore }} - - {{ $solver.solved }} - {{ $solver.solved_sat }} - {{ $solver.solved_unsat }} - {{ $solver.unsolved }} - {{ $solver.abstained }} - - {{ $solver.timeout }} - {{ $solver.memout }} + + {{ $solver.correctScore }} + {{ if not (eq $solver.baseSolver "") }} + {{ $.Scratch.Set "hasBaseSolvers" true }} + {{ if not (lt $solver.deltaBaseSolver 0) }} +
(base +{{ $solver.deltaBaseSolver }}) + {{ else }} +
(base -{{ $solver.deltaBaseSolver }}) + {{ end }} + {{ end }} + + {{ $solver.CPUScore }} + {{ $solver.WallScore }} + + {{ $solver.solved }} + {{ $solver.solved_sat }} + {{ $solver.solved_unsat }} + {{ $solver.unsolved }} + {{ $solver.abstained }} + + {{ $solver.timeout }} + {{ $solver.memout }} {{ end }} + +{{ if ($.Scratch.Get "hasBaseSolvers") }} + + (base +/- n): for derived solvers: increment over base solver
+
+{{ end }} + +{{ if ($.Scratch.Get "hasNonCompetingSolvers") }} + + n: non-competing solver + +{{ end }} + + + {{ end }} {{ end }} diff --git a/web/themes/smtcomp/layouts/_default/results.html b/web/themes/smtcomp/layouts/_default/results.html index cfbd8812..33ac64b4 100644 --- a/web/themes/smtcomp/layouts/_default/results.html +++ b/web/themes/smtcomp/layouts/_default/results.html @@ -15,7 +15,7 @@ "UnsatCore" "unsat-core" }} -

SMT-COMP 2024 Results

+

SMT-COMP 2025 Results

{{ if isset $data "processed" }} The processed data are available in the GitHub repository. diff --git a/web/themes/smtcomp/layouts/_default/results_summary.html b/web/themes/smtcomp/layouts/_default/results_summary.html index 0718a9ff..09c75760 100644 --- a/web/themes/smtcomp/layouts/_default/results_summary.html +++ b/web/themes/smtcomp/layouts/_default/results_summary.html @@ -25,7 +25,7 @@ {{ $categories_pretty := .Site.Data.pretty_names.performance }} -

SMT-COMP 2024 Results - {{ $prettyTrack }}

+

SMT-COMP 2025 Results - {{ $prettyTrack }}

Summary of all competition results for the {{ $prettyTrack }}.