@@ -35,7 +35,7 @@ def get_advglue_scores(breakdown=False):
3535 model_scores = {k .removeprefix ("/" ): v * 100 for k , v in scores ["Accuracy" ].items ()}
3636 model_rejections = {k .removeprefix ("/" ): v * 100 for k , v in scores ["RR+NE" ].items ()}
3737 if breakdown :
38- with open (os .path .join (RESULT_DIR , "adv-glue-plus-plus" , "summary .json" )) as src :
38+ with open (os .path .join (RESULT_DIR , "adv-glue-plus-plus" , "breakdown .json" )) as src :
3939 breakdown_scores = json .load (src )
4040 return breakdown_scores
4141 else :
@@ -72,13 +72,36 @@ def get_ethics_scores(breakdown=False):
7272 df = pd .read_json (os .path .join (RESULT_DIR , "machine_ethics" , "generations" , "scores.jsonl" ), lines = True )
7373 if breakdown :
7474 keys = ["avg_fpr_ev" , "avg_fpr_jb" , "acc_few" , "acc_zero" ]
75- df = df [df ["dataset" ] == "ethics_commonsense_short" ].drop_duplicates ().set_index ("model" )[keys ]
76- df .rename ({
75+ df = df [df ["dataset" ] == "ethics_commonsense_short" ].drop_duplicates ()
76+ df = df [["model" ] + keys ]
77+ df = df .rename ({
7778 "acc_few" : "few-shot benchmark" ,
78- "acc_zero" : "few -shot benchmark" ,
79+ "acc_zero" : "zero -shot benchmark" ,
7980 "avg_fpr_jb" : "jailbreak" ,
8081 "avg_fpr_ev" : "evasive"
81- })
82+ }, axis = 1 )
83+
84+ model_breakdown = {}
85+ for record in df .to_dict (orient = "records" ):
86+ model_breakdown ["model" ] = {
87+ "few-shot benchmark" : record ["few-shot benchmark" ],
88+ "zero-shot benchmark" : record ["zero-shot benchmark" ],
89+ "jailbreak" : record ["jailbreak" ],
90+ "evasive" : record ["evasive" ]
91+ }
92+ # "jailbreak": {
93+ # "brittleness": 1.0
94+ # },
95+ # "evasive": {
96+ # "brittleness": 1.0
97+ # },
98+ # "zero-shot benchmark": {
99+ # "performance": 0.533902323376007
100+ # },
101+ # "few-shot benchmark": {
102+ # "performance": 0.683262209577999
103+ # }
104+ return model_breakdown
82105 else :
83106 keys = ["agg_score" , "ref_rate" ]
84107 df = df [df ["dataset" ] == "ethics_commonsense_short" ].drop_duplicates ().set_index ("model" )[keys ]
@@ -101,9 +124,9 @@ def get_ood_scores(breakdown=False):
101124 model_rejections [model_name ] = scores .get ("rr" , None )
102125 model_breakdowns [model_name ] = scores
103126 if breakdown :
104- return {"score" : model_scores , "rejection_rate" : model_rejections }
105- else :
106127 return model_breakdowns
128+ else :
129+ return {"score" : model_scores , "rejection_rate" : model_rejections }
107130
108131
109132def get_privacy_scores (breakdown = False ):
0 commit comments