diff --git a/smtcomp/generate_graphics.py b/smtcomp/generate_graphics.py index cdaaea38..2c19d993 100644 --- a/smtcomp/generate_graphics.py +++ b/smtcomp/generate_graphics.py @@ -13,6 +13,8 @@ from smtcomp.utils import * import smtcomp.generate_website_page import frontmatter +from random import Random +import math c_file = pl.col("file") c_logic = pl.col("logic") @@ -28,6 +30,48 @@ c_bucket2 = pl.col("bucket2") +def correlation_sorting(solvers: List[U], corrs: Mapping[Tuple[U, U], float], nb_iteration: int) -> None: + """ + Strangely I can't find easy to use lib creating a well clustered correlation matrix. block modelling. + + We use simulated annealing + """ + if len(solvers) <= 2: + return + + r = Random(0) + + def neighbor(i: int) -> float: + s = 0.0 + n = 0 + if 0 < i: + s += 1 - corrs[solvers[i - 1], solvers[i]] + n += 1 + if i < len(solvers) - 1: + s += 1 - corrs[solvers[i], solvers[i + 1]] + n += 1 + # At the bounds there is only one neighbor + return s / n + + def swap(i: int, j: int) -> None: + tmp = solvers[i] + solvers[i] = solvers[j] + solvers[j] = tmp + + for i in range(0, nb_iteration): + a = r.randint(0, len(solvers) - 1) + b = r.randint(0, len(solvers) - 2) + if a <= b: + b += 1 + s1 = neighbor(a) + neighbor(b) + swap(a, b) + s2 = neighbor(a) + neighbor(b) + swap(a, b) + t = 1 - (i / (nb_iteration + 1)) + if s2 < s1 or math.exp((s1 - s2) / t) > r.uniform(0.0, 1.0): + swap(a, b) + + def create_output( config: defs.Config, results: pl.LazyFrame, @@ -56,6 +100,7 @@ def create_output( results.group_by(c_solver, c_solver2) .agg(corr=pl.corr(c_cputime_s, c_cputime_s2, method="pearson")) .sort(c_solver, c_solver2) + .select(c_solver, c_solver2, "corr") ) results = ( @@ -78,39 +123,62 @@ def create_output( ).lazy() results = results.join(lf_lookup, how="left", on=replaced).drop(replaced).rename({"pretty": replaced}) - buckets = results.select(c_bucket.unique()) - - df_corr, df_results, df_buckets = pl.collect_all([corr, results, buckets]) + df_corr, df_results, df_buckets, df_answers, df_solvers = pl.collect_all( + [ + corr, + results, + results.select(c_bucket.unique()), + results.select(c_answer.unique()), + results.select(c_solver.unique()), + ] + ) bucket_domain: list[float] = list(df_buckets["bucket"]) + answer_domain: list[str] = list(df_answers["answer"]) + answer_domain.sort(key=lambda x: defs.Answer(x).id) + + solver_domain: list[str] = list(df_solvers["solver"]) + solver_domain.sort(key=lambda x: x.lower()) - row1 = df_corr.row(min(1, len(df_corr) - 1), named=True) + if True: + # Two provers can have no benchmars in common, their pairs is not in df_corrs + corrs: DefaultDict[Tuple[str, str], float] = defaultdict(lambda: 0.0) + for row in df_corr.rows(named=False): + corrs[row[0], row[1]] = row[2] + correlation_sorting(solver_domain, corrs, 1000) # Create heatmap with selection - select_x = alt.selection_point(fields=["solver"], name="solver1", value=row1["solver"], toggle=False) - select_y = alt.selection_point(fields=["solver2"], name="solver2", value=row1["solver2"], toggle=False) - answer_x = alt.selection_point(fields=["answer"], name="answer1") - answer_y = alt.selection_point(fields=["answer2"], name="answer2") + solvers = alt.selection_point( + fields=["solver", "solver2"], + name="solvers", + value=[{"solver": solver_domain[0], "solver2": solver_domain[min(1, len(solver_domain) - 1)]}], + toggle=False, + ) + answer_xy = alt.selection_point(fields=["answer", "answer2"], name="answer") logic = alt.selection_point(fields=["logic"], name="logic") division = alt.selection_point(fields=["division"], name="division") g_select_provers = ( alt.Chart(df_corr, title="Click a tile to compare solvers", height=250, width=250) .mark_rect() .encode( - alt.X("solver", title=None), - alt.Y("solver2", title=None), + alt.X("solver", title="solver1").scale(domain=solver_domain), + alt.Y("solver2", title="solver2").scale(domain=list(reversed(solver_domain))), alt.Color("corr", scale=alt.Scale(domain=[-1, 1], scheme="blueorange")), - opacity=alt.when(select_x, select_y).then(alt.value(1)).otherwise(alt.value(0.4)), + stroke=alt.when(solvers).then(alt.value("lightgreen")), + strokeWidth=alt.value(3), + opacity=alt.value(0.8), ) - .add_params(select_x, select_y) + .add_params(solvers) ) - title = alt.Title(alt.expr(f'{select_x.name}.solver + " vs " + {select_y.name}.solver2')) - g_results = ( + # Number of results by time + title = alt.Title( + alt.expr(f'{solvers.name}.solver + " vs " + {solvers.name}.solver2'), subtitle="comparison of cpu time" + ) + g_results_base = ( alt.Chart(df_results, title=title, height=250, width=250) - .transform_filter(select_x, select_y, answer_x, answer_y) + .transform_filter(solvers, answer_xy, logic, division) .transform_aggregate(benchs="sum(len)", groupby=["bucket", "bucket2"]) - .add_params(select_x, select_y, answer_x, answer_y) .encode( alt.X("bucket:O").axis(title="solver1", bandPosition=0.5) # align should move the center of the cell, it does not @@ -122,42 +190,40 @@ def create_output( ) ) - g_results_rect = g_results.mark_rect().encode(alt.Color("benchs:Q", scale=alt.Scale(scheme="yellowgreenblue"))) - - g_results_text = g_results.mark_text(baseline="middle") - - # g_answers = ( - # alt.Chart(df_results, title="Answers") - # .transform_filter(select_x, select_y, answer_x, answer_y) - # .transform_aggregate(benchs="sum(len)", groupby=["answer_x", "bucket2"]) - # .add_params(select_x, select_y, answer_x, answer_y) - # .encode( - # alt.X("bucket:O").axis(title="solver1", bandPosition=0.5) - # # align should move the center of the cell, it does not - # .scale(type="band", align=0), - # alt.Y("bucket2:O").axis(title="solver2", bandPosition=0.5).scale(type="band", align=0).sort("descending"), - # text="benchs:Q", - # ) - # .mark_bar() - # ) - opacity = ( - alt.when(select_x, select_y, answer_x, answer_y, logic, division).then(alt.value(1)).otherwise(alt.value(0.0)) - ) + g_results_rect = g_results_base.mark_rect().encode(alt.Color("benchs:Q", scale=alt.Scale(scheme="yellowgreenblue"))) - legend_answer_x = ( - alt.Chart(df_results, title=alt.Title(alt.expr(f'"solver1:" + {select_x.name}.solver'))) - .mark_point() - .encode(alt.Y("answer:N").axis(title="", orient="right"), opacity=opacity) - .add_params(answer_x) - ) + g_results_text = g_results_base.mark_text(baseline="middle") - legend_answer_y = ( - alt.Chart(df_results, title=alt.Title(alt.expr(f'"solver2:" + {select_y.name}.solver2'))) - .mark_point() - .encode(alt.Y("answer2:N").axis(title="", orient="right"), opacity=opacity) - .add_params(answer_y) + g_results = g_results_rect + g_results_text + + opacity = alt.when(solvers, answer_xy, logic, division).then(alt.value(1)).otherwise(alt.value(0.01)) + + opacity_answer = alt.when(answer_xy).then(alt.value(1)).otherwise(alt.value(0.5)) + + # Number of results by answers + g_answer_base = ( + alt.Chart(df_results, title="Comparison of the answers", height=250, width=250) + .transform_filter(solvers, logic, division) + .transform_aggregate(benchs="sum(len)", groupby=["answer", "answer2"]) + .add_params(answer_xy) + .encode( + alt.X("answer:N").axis(title="solver1", bandPosition=0.5) + # align should move the center of the cell, it does not + .scale(type="band", align=0, domain=answer_domain), + alt.Y("answer2:N") + .axis(title="solver2", bandPosition=0.5) + .scale(type="band", align=0, domain=list(reversed(answer_domain))), + text="benchs:Q", + opacity=opacity_answer, + ) ) + g_answer_rect = g_answer_base.mark_rect().encode(alt.Color("benchs:Q", scale=alt.Scale(scheme="yellowgreenblue"))) + + g_answer_text = g_answer_base.mark_text(baseline="middle") + + g_answer = g_answer_rect + g_answer_text + legend_logic = ( alt.Chart(df_results, title="Logic") .mark_point() @@ -172,9 +238,9 @@ def create_output( .add_params(division) ) - graph: alt.api.ChartType = (g_select_provers | g_results_rect + g_results_text).resolve_scale(color="independent") + graph: alt.api.ChartType = (g_select_provers | g_results).resolve_scale(color="independent") - graph = alt.vconcat(graph, legend_answer_x | legend_answer_y | legend_logic | legend_division) + graph = alt.vconcat(graph, (g_answer | (legend_logic | legend_division)).resolve_scale(color="independent")) graph = graph.resolve_scale(color="independent")