@@ -624,16 +624,16 @@ class Metrics(Enum):
624624 sample_level_fn = GPassAtK (k = 16 , n = 48 , strip_strings = True ).compute ,
625625 category = MetricCategory .GENERATIVE_SAMPLING ,
626626 use_case = MetricUseCase .REASONING ,
627- corpus_level_fn = { metric : np . mean for metric in GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics } ,
628- higher_is_better = { metric : True for metric in GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics } ,
627+ corpus_level_fn = dict . fromkeys ( GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics , np . mean ) ,
628+ higher_is_better = dict . fromkeys ( GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics , True ) ,
629629 )
630630 g_pass_at_8_16 = SampleLevelMetricGrouping (
631631 metric_name = "G-Pass@8-16:48_samples" ,
632632 sample_level_fn = GPassAtK (k = [8 , 16 ], n = 48 , strip_strings = True ).compute ,
633633 category = MetricCategory .GENERATIVE_SAMPLING ,
634634 use_case = MetricUseCase .REASONING ,
635- corpus_level_fn = { metric : np . mean for metric in GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics } ,
636- higher_is_better = { metric : True for metric in GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics } ,
635+ corpus_level_fn = dict . fromkeys ( GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics , np . mean ) ,
636+ higher_is_better = dict . fromkeys ( GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics , True ) ,
637637 )
638638 g_pass_at_16_expr_gold = SampleLevelMetricGrouping (
639639 metric_name = "G-Pass@16:48_samples" ,
@@ -653,8 +653,8 @@ class Metrics(Enum):
653653 ).compute ,
654654 category = MetricCategory .GENERATIVE_SAMPLING ,
655655 use_case = MetricUseCase .REASONING ,
656- corpus_level_fn = { metric : np . mean for metric in GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics } ,
657- higher_is_better = { metric : True for metric in GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics } ,
656+ corpus_level_fn = dict . fromkeys ( GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics , np . mean ) ,
657+ higher_is_better = dict . fromkeys ( GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics , True ) ,
658658 )
659659 g_pass_at_16_latex_gold = SampleLevelMetricGrouping (
660660 metric_name = "G-Pass@16:48_samples" ,
@@ -674,8 +674,8 @@ class Metrics(Enum):
674674 ).compute ,
675675 category = MetricCategory .GENERATIVE_SAMPLING ,
676676 use_case = MetricUseCase .REASONING ,
677- corpus_level_fn = { metric : np . mean for metric in GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics } ,
678- higher_is_better = { metric : True for metric in GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics } ,
677+ corpus_level_fn = dict . fromkeys ( GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics , np . mean ) ,
678+ higher_is_better = dict . fromkeys ( GPassAtK (k = 16 , n = 48 , strip_strings = True ).all_metrics , True ) ,
679679 )
680680 perfect_exact_match = SampleLevelMetric (
681681 metric_name = "perfect_em" ,
0 commit comments