-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathper_feature_agreement.py
More file actions
110 lines (85 loc) · 3.02 KB
/
per_feature_agreement.py
File metadata and controls
110 lines (85 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""Generate the per-feature agreement figure used in the manuscript.
This is a cleaned-up version of the relevant cells from irr_kaileigh.ipynb.
Running this script saves `per_feature_agreement_kappa_vs_percent.png`
next to the script.
"""
from __future__ import annotations
from pathlib import Path
from typing import Optional, Union
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
BASE_DIR = Path(__file__).resolve().parent
DATA_DIR = BASE_DIR / 'data'
OUTPUT_PATH = BASE_DIR / 'per_feature_agreement_kappa_vs_percent.png'
def load_feature_summary(csv_path: Optional[Union[str, Path]] = None) -> pd.DataFrame:
"""Load the pre-computed per-feature IRR summary table."""
if csv_path is None:
csv_path = DATA_DIR / 'feature_irr_results.csv'
df = pd.read_csv(csv_path)
df['FeatureID'] = df['FeatureID'].str.strip()
return df
def _configure_matplotlib() -> None:
plt.rcParams.update({
'font.family': 'DejaVu Sans',
'axes.edgecolor': 'black',
'axes.facecolor': 'white',
'xtick.color': 'black',
'ytick.color': 'black',
'axes.titleweight': 'bold'
})
def plot_per_feature_agreement(df: pd.DataFrame, output_path: Path = OUTPUT_PATH) -> None:
"""Reproduce the κ vs. % agreement plot."""
_configure_matplotlib()
fig, ax_k = plt.subplots(figsize=(7, 5))
ypos = np.arange(len(df))
ax_k.set_yticks(ypos)
ax_k.set_yticklabels(df['FeatureID'])
fontsize_label = 10
fontsize_title = 12
ax_k.errorbar(
df['Mean Kappa'],
ypos - 0.12,
xerr=df['SD Kappa'],
fmt='o',
color='black',
markersize=7,
elinewidth=1.8,
capsize=3,
label='κ (mean ± SD)'
)
ax_k.errorbar(
df['Mean %Agree'],
ypos + 0.12,
xerr=df['SD %Agree'],
fmt='s',
mfc='none',
mec='#C00000',
color='#C00000',
markersize=7,
elinewidth=1.8,
capsize=3,
label='Agreement (mean ± SD)'
)
ax_k.set_xlim(0, 1)
ax_k.set_xlabel('Mean Value (κ and Agreement)', fontsize=fontsize_label)
ax_k.set_ylabel('Feature ID', fontsize=fontsize_label)
ax_k.set_ylim(-0.6, len(ypos) - 0.4)
for spine in ax_k.spines.values():
spine.set_linewidth(1.2)
ax_k.tick_params(axis='both', labelsize=fontsize_label)
ax_k.grid(axis='y', color='#b5b5b5', linewidth=0.7)
ax_k.grid(axis='x', color='#dcdcdc', linewidth=0.7, linestyle='--', alpha=0.6)
ax_k.set_title('Per-Feature Agreement', fontsize=fontsize_title, pad=18)
handles, labels = ax_k.get_legend_handles_labels()
fig.legend(handles, labels, loc='upper left', bbox_to_anchor=(0.02, 0.98),
frameon=False, fontsize=fontsize_label)
plt.tight_layout()
fig.savefig(output_path, dpi=300, bbox_inches='tight')
plt.close(fig)
print(f"Saved figure -> {output_path}")
def main() -> None:
df_summary = load_feature_summary()
plot_per_feature_agreement(df_summary)
if __name__ == '__main__':
main()