1
1
import re
2
+ from contextlib import contextmanager
2
3
3
4
from loguru import logger as eval_logger
4
5
from PIL import Image
@@ -101,10 +102,10 @@ def xlrs_process_results(doc, results):
101
102
"answer" : doc ["answer" ],
102
103
}
103
104
104
- return {"xlrs_micro_score" : data_dict }
105
+ return {"xlrs_micro_score" : data_dict , "xlrs_macro_score" : data_dict }
105
106
106
107
107
- def xlrs_aggregate_results (results ):
108
+ def xlrs_aggregate_results (results , macro = False ):
108
109
"""
109
110
Args:
110
111
results: a list of values returned by process_results
@@ -141,7 +142,7 @@ def xlrs_aggregate_results(results):
141
142
else :
142
143
metrics [Task ][Subtask ][f"{ Category } " ]["true" ] += cnt
143
144
metrics [Task ][Subtask ][f"{ Category } " ]["false" ] += 1 - cnt
144
-
145
+ macros = []
145
146
sum_all , succ_all = 0 , 0
146
147
for task , tasks_values in metrics .items ():
147
148
eval_logger .info ("*" * 32 + f"{ task } (Task Start)" )
@@ -160,6 +161,7 @@ def xlrs_aggregate_results(results):
160
161
else :
161
162
acc_subtasks = cnt_subtask / sum_subtask
162
163
eval_logger .info ("+" * 16 + "\t Acc " + "{:.4f}" .format (acc_subtasks ) + f"\t { substask } ({ sum_subtask } items)" )
164
+ macros .append (acc_subtasks )
163
165
cnt_task += cnt_subtask
164
166
sum_task += sum_subtask
165
167
@@ -171,4 +173,21 @@ def xlrs_aggregate_results(results):
171
173
sum_all += sum_task
172
174
eval_logger .info ("*" * 32 + "Acc " + "{:.4f}" .format (acc_task ) + f"\t { task } ({ sum_task } items)\n " )
173
175
eval_logger .info ("*" * 32 + "Overall Acc " + "{:.4f}" .format (succ_all / sum_all ))
174
- return succ_all / sum_all
176
+ if macro is True :
177
+ return sum (macros ) / len (macros )
178
+ else :
179
+ return succ_all / sum_all
180
+
181
+
182
+ @contextmanager
183
+ def mute_eval_logger ():
184
+ eval_logger .disable (__name__ )
185
+ try :
186
+ yield
187
+ finally :
188
+ eval_logger .enable (__name__ )
189
+
190
+
191
+ def xlrs_aggregate_results_macro_score (results ):
192
+ with mute_eval_logger ():
193
+ return xlrs_aggregate_results (results , macro = True )
0 commit comments