BlackHole/Plot_Redundency.py at main · MehrdadJalali-AI/BlackHole · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# ==========================================
# Redundancy Impact Plots (mean only, no shadows)
# - Reads: Redundency_aggregated_results.csv
# - Plots: Accuracy vs. Redundancy Level (per model)
# - Aggregation: Mean across runs
# - Style schema matches your previous code
# ==========================================

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# ------------------------------
# Config
# ------------------------------
INPUT_CSV  = "Redundency_aggregated_results.csv"
OUTPUT_DIR = "plots_redundancy_mean"

sns.set(style="whitegrid", context="talk", palette="deep")

METHOD_ORDER  = ["blackhole", "stratified", "pagerank", "kcenter"]
METHOD_COLORS = {
    "blackhole": "#2E2E2E",
    "stratified": "#2CA02C",
    "pagerank":   "#D62728",
    "kcenter":    "#1F77B4",
}
METHOD_STYLES = {
    "blackhole": "-",
    "stratified": "--",
    "pagerank":   "-.",
    "kcenter":    ":",
}
METHOD_MARKERS = {
    "blackhole": "o",
    "stratified": "^",
    "pagerank":   "s",
    "kcenter":    "D",
}

# ------------------------------
# Load data
# ------------------------------
df = pd.read_csv(INPUT_CSV)

# Required columns
needed = ["Redundancy_Level", "Method", "Model", "Accuracy_Mean"]
missing = [c for c in needed if c not in df.columns]
if missing:
    raise ValueError(f"Missing columns in CSV: {missing}. Found: {list(df.columns)}")

# Normalize method names
df["Method"] = df["Method"].str.lower()

# ------------------------------
# Aggregate: Mean per (Model, Method, Redundancy_Level)
# ------------------------------
agg = (
    df.groupby(["Model", "Method", "Redundancy_Level"], dropna=False)
      .agg(Accuracy_Mean=("Accuracy_Mean", "mean"))
      .reset_index()
)

# ------------------------------
# Plot function (mean only)
# ------------------------------
os.makedirs(OUTPUT_DIR, exist_ok=True)

def plot_accuracy_vs_redundancy(model_name, subset):
    plt.figure(figsize=(10, 6))
    methods_here = [m for m in METHOD_ORDER if m in subset["Method"].unique()]

    for method in methods_here:
        md = subset[subset["Method"] == method].sort_values("Redundancy_Level")
        if md.empty:
            continue

        # Line: mean only
        plt.plot(
            md["Redundancy_Level"], md["Accuracy_Mean"],
            label=method.title(),
            color=METHOD_COLORS.get(method),
            linestyle=METHOD_STYLES.get(method, "-"),
            marker=METHOD_MARKERS.get(method, "o"),
            linewidth=2, markersize=8
        )

    # Baseline: Stratified at redundancy 0 (mean)
    base = subset[(subset["Method"] == "stratified") & (np.isclose(subset["Redundancy_Level"], 0.0))]
    if not base.empty:
        baseline = float(base["Accuracy_Mean"].iloc[0])
        plt.axhline(
            baseline, color="gray", linestyle=":", linewidth=1.5,
            label=f"Baseline (Stratified @ 0.0) = {baseline:.3f}"
        )

    plt.title(f"{model_name}: Accuracy vs. Redundancy Level", fontsize=20, fontweight="bold")
    plt.xlabel("Redundancy Level", fontsize=18)
    plt.ylabel("Accuracy (Mean)", fontsize=18)
    plt.xticks(sorted(subset["Redundancy_Level"].unique()), fontsize=14)
    plt.yticks(fontsize=14)
    plt.grid(True, linestyle="--", alpha=0.4)
    plt.legend(fontsize=14)
    plt.tight_layout()

    fname = f"{OUTPUT_DIR}/{model_name}_Accuracy_vs_Redundancy.png"
    plt.savefig(fname, dpi=600)
    plt.close()

# ------------------------------
# Generate plots per model
# ------------------------------
for model in sorted(agg["Model"].unique()):
    plot_accuracy_vs_redundancy(model, agg[agg["Model"] == model])

print(f"Saved redundancy mean-only plots to: {OUTPUT_DIR}")