Skip to content

Commit 9b278f5

Browse files
committed
Add AdvGLUE++ score breakdown
1 parent 1b467a7 commit 9b278f5

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

src/dt/perspectives/advglue/adv_stats.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,19 @@ def main():
5555
json.dump(refusals, dest, indent=4)
5656
print(json.dumps(refusals, indent=4))
5757
df = pd.DataFrame(df)
58-
print(df)
5958
df.to_csv(os.path.join(RESULT_DIR, "task_breakdown.csv"), index=False)
6059

60+
breakdown = df[["TargetModel", "Task", "Accuracy"]].groupby(["TargetModel", "Task"]).mean()
61+
breakdown_dict = {}
62+
for model in breakdown.reset_index()["TargetModel"].unique():
63+
breakdown_dict[model] = {}
64+
for task in breakdown.reset_index()["Task"].unique():
65+
breakdown_dict[model][task] = {"acc": breakdown.loc[(model, task), "Accuracy"]}
66+
67+
with open(os.path.join(RESULT_DIR, "breakdown.json"), "w") as dest:
68+
print(breakdown)
69+
json.dump(breakdown_dict, dest, indent=4)
70+
6171
# task_weights = df.apply(lambda x: x["TaskDataCount"] / (df[df["Task"] == x["Task"]]["TaskDataCount"].unique(
6272
# ).sum()), axis=1) df["Accuracy"] *= task_weights df["AccuracyNoRefusal"] *= task_weights
6373
df.drop(["BaseModel", "Task"], axis=1, inplace=True)

0 commit comments

Comments
 (0)