3131from lighteval .utils .language import Language
3232
3333
34+ chinese_answer_type_dict = {"Numerical" : "数值" , "Expression" : "表达式" , "Equation" : "方程" , "Interval" : "区间" }
35+ english_answer_type_dict = {
36+ "Numerical" : "a numerical value" ,
37+ "Expression" : "an expression" ,
38+ "Equation" : "an equation" ,
39+ "Interval" : "an interval" ,
40+ }
41+
42+
43+ def get_single_answer_type_text (answer_type , is_chinese ):
44+ if "-" in answer_type : # No need now
45+ answer_type = answer_type [: answer_type .find ("-" )]
46+ for t in ["Numerical" , "Expression" , "Equation" , "Interval" ]:
47+ if t in answer_type :
48+ if is_chinese :
49+ return chinese_answer_type_dict [t ]
50+ else :
51+ return english_answer_type_dict [t ]
52+ exit (f"Error parsing answer type { answer_type } !" )
53+
54+
55+ def get_answer_type_text (answer_type , is_chinese , multiple_answer ):
56+ if (
57+ ("Need_human_evaluate" in answer_type ) or ("Tuple" in answer_type )
58+ ): # 'Tuple' has various meanings in different context, such as position or values of a series of variable, so it may lead to confusion to directly use 'tuple' in the prompt.
59+ full_answer_text = ""
60+ else :
61+ if not multiple_answer :
62+ answer_text = get_single_answer_type_text (answer_type , is_chinese )
63+ if is_chinese :
64+ full_answer_text = f",答案类型为{ answer_text } "
65+ else :
66+ full_answer_text = f"The answer of The problem should be { answer_text } . "
67+ else :
68+ if "," not in answer_type : # Same answer type for all answers
69+ answer_text = get_single_answer_type_text (answer_type , is_chinese )
70+ if is_chinese :
71+ full_answer_text = f",题目有多个答案,答案类型均为{ answer_text } "
72+ else :
73+ full_answer_text = f"The problem has multiple answers, each of them should be { answer_text } . "
74+ else :
75+ answer_types = answer_type .split ("," )
76+ answer_types = [get_single_answer_type_text (t , is_chinese ) for t in answer_types ]
77+ if len (set (answer_types )) == 1 :
78+ answer_text = answer_types [0 ]
79+ if is_chinese :
80+ full_answer_text = f",题目有多个答案,答案类型均为{ answer_text } "
81+ else :
82+ full_answer_text = f"The problem has multiple answers, each of them should be { answer_text } . "
83+ else :
84+ if is_chinese :
85+ answer_text = "、" .join (answer_types )
86+ full_answer_text = f",题目有多个答案,答案类型分别为{ answer_text } "
87+ else :
88+ answer_text = ", " .join (answer_types )
89+ full_answer_text = (
90+ f"The problem has multiple answers, with the answers in order being { answer_text } . "
91+ )
92+ return full_answer_text
93+
94+
3495# Very specific task where there are no precise outputs but instead we test if the format obeys rules
3596def olympiad_bench_prompt (line , task_name : str = None ):
97+ is_math = "Math" in line ["subject" ]
98+ subject = "Math" if is_math else "Physics"
99+
100+ is_theorem_proving = "TP" in task_name
101+ unit = line ["unit" ]
102+ is_multiple_answer = line ["is_multiple_answer" ]
103+
104+ if is_theorem_proving :
105+ instruction = f"The following is a theorem proving problem from an International { subject } competition. Please use logical reasoning and common theorems to prove the proposition in the problem according to the given requirements. Please use LaTeX format to represent the variables and formulas used in the proof."
106+ else :
107+ if is_multiple_answer :
108+ multiple_answer_text = "\\ boxed{multiple answers connected with commas}"
109+ else :
110+ multiple_answer_text = "\\ boxed{answer}"
111+ unit_text = ""
112+ if unit :
113+ multiple_answer_text += "(unit)"
114+ unit_text = ", note that the unit of the answer should not be included in \\ boxed{}"
115+
116+ answer_type_text = get_answer_type_text (
117+ line ["answer_type" ], is_chinese = False , multiple_answer = is_multiple_answer
118+ )
119+
120+ instruction = f'The following is an open-ended problem from an International { subject } competition. { answer_type_text } Please calculate the answer according to the given requirements and the information provided. Please use LaTeX format to represent the variables and formulas used in the solution process and results. Please end your solution with "So the final answer is { multiple_answer_text } ." and give the result explicitly{ unit_text } .'
121+
122+ # instruction += f"\nYou are an AI assistant. Please answer the following {subject} competition questions as required."
123+
124+ choice = line ["final_answer" ]
125+
36126 return Doc (
37127 task_name = task_name ,
38- query = line ["question" ],
39- choices = [line [ "final_answer" ] ],
128+ query = instruction + " \n " + line ["question" ],
129+ choices = [choice ],
40130 gold_index = 0 ,
41- instruction = "" ,
131+ instruction = instruction ,
42132 specific = {},
43133 )
44134
@@ -54,7 +144,7 @@ def olympiad_bench_prompt(line, task_name: str = None):
54144# * COMP: Competition problems
55145# * CEE: Chinese College Entrance Exam problems
56146
57- question_type = ["OE" , "TP" ]
147+ question_type = ["OE" ] # "TP"]
58148multimodality = ["TO" ] # MM
59149subject = ["physics" , "maths" ]
60150language = ["en" ] # "zh"]
@@ -69,6 +159,29 @@ def olympiad_bench_prompt(line, task_name: str = None):
69159 for src in source :
70160 olympiad_bench_subsets .append (f"{ qt } _{ mm } _{ sub } _{ lang } _{ src } " )
71161
162+ available_subsets = [
163+ "OE_MM_maths_en_COMP" ,
164+ "OE_MM_maths_zh_CEE" ,
165+ "OE_MM_maths_zh_COMP" ,
166+ "OE_MM_physics_en_COMP" ,
167+ "OE_MM_physics_zh_CEE" ,
168+ "OE_TO_maths_en_COMP" ,
169+ "OE_TO_maths_zh_CEE" ,
170+ "OE_TO_maths_zh_COMP" ,
171+ "OE_TO_physics_en_COMP" ,
172+ "OE_TO_physics_zh_CEE" ,
173+ "TP_MM_maths_en_COMP" ,
174+ "TP_MM_maths_zh_CEE" ,
175+ "TP_MM_maths_zh_COMP" ,
176+ "TP_MM_physics_en_COMP" ,
177+ "TP_TO_maths_en_COMP" ,
178+ "TP_TO_maths_zh_CEE" ,
179+ "TP_TO_maths_zh_COMP" ,
180+ "TP_TO_physics_en_COMP" ,
181+ ]
182+
183+ olympiad_bench_subsets = set (olympiad_bench_subsets ).intersection (available_subsets )
184+
72185extraction_targets = [ExprExtractionConfig (), LatexExtractionConfig ()]
73186
74187metric = multilingual_extractive_match_metric (
@@ -77,23 +190,28 @@ def olympiad_bench_prompt(line, task_name: str = None):
77190 pred_extraction_target = extraction_targets ,
78191 precision = 6 ,
79192)
80- # We create the task config
81- olympiad_bench = LightevalTaskConfig (
82- name = "olympiad_bench" ,
83- prompt_function = olympiad_bench_prompt ,
84- suite = ["extended" ],
85- hf_repo = "Hothan/OlympiadBench" ,
86- hf_subset = olympiad_bench_subsets [0 ],
87- metric = [metric ],
88- hf_avail_splits = ["train" ],
89- evaluation_splits = ["train" ],
90- few_shots_split = "train" ,
91- few_shots_select = "random_sampling" ,
92- generation_size = 2048 ,
93- stop_sequence = [], # no stop sequence, will use eot token
94- version = "1.0" ,
95- )
96193
97- # print(olympiad_bench)
194+ task_configs = []
195+
196+ for subset in olympiad_bench_subsets :
197+ # We create the task config
198+ task_configs .append (
199+ LightevalTaskConfig (
200+ name = "olympiad_bench:" + subset ,
201+ prompt_function = olympiad_bench_prompt ,
202+ suite = ["extended" ],
203+ hf_repo = "Hothan/OlympiadBench" ,
204+ hf_subset = subset ,
205+ metric = [metric ],
206+ hf_avail_splits = ["train" ],
207+ evaluation_splits = ["train" ],
208+ few_shots_split = "train" ,
209+ few_shots_select = "random_sampling" ,
210+ generation_size = 2048 ,
211+ stop_sequence = [], # no stop sequence, will use eot token
212+ version = 1 ,
213+ )
214+ )
215+
98216
99- TASKS_TABLE = [ olympiad_bench ]
217+ TASKS_TABLE = task_configs
0 commit comments