Skip to content

Commit 3c9b0c9

Browse files
authored
Bug fix extractive match (#540)
* update extraction match to reflect newest math-verify * revert symbols, improve sets handling * rm todo * fmt + remove empty excepts + bump l2s * fmt * docstring * fixed boxed, bump broken latex2sympy * allow more separators
1 parent f8405ee commit 3c9b0c9

File tree

3 files changed

+15
-8
lines changed

3 files changed

+15
-8
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ multilingual = [
109109
"jieba", # for chinese tokenizer
110110
"pyvi", # for vietnamese tokenizer
111111
]
112-
math = ["latex2sympy2_extended==1.0.4"]
112+
math = ["latex2sympy2_extended==1.0.6"]
113113

114114
[project.urls]
115115
Homepage = "https://github.com/huggingface/lighteval"

src/lighteval/metrics/utils/extractive_match_utils.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,7 @@ def make_latex_env_pattern(prefix: str = "", context: Literal["boxed", "plain"]
215215
rf"(?<!\\)\\\((?P<{prefix}latexInlineParenthesis>{inline_content_parenthesis})(?<!\\)\\\)",
216216
rf"\s\[(?P<{prefix}latexInlineBracket>{inline_content_bracket})\]\s",
217217
]
218-
if context == "boxed":
219-
# allow also matching plain boxed
220-
patterns.append(rf"(?P<{prefix}latexBoxed>\\boxed{{.+}})")
221-
elif context == "plain":
218+
if context == "plain":
222219
simple_number = r"-?\d+(?:[.,]\d+)?"
223220
patterns.append(rf"(?P<{prefix}latexFraction>-?\\frac{{{simple_number}}}{{{simple_number}}})")
224221

@@ -237,7 +234,7 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
237234
and_word = translation_literal.and_word
238235
or_word = translation_literal.or_word
239236
next_groups = "".join(
240-
[rf"(?:\s*(?:{and_word}|{or_word})\s*{make_latex_env_pattern(f'next{i}_')})?" for i in range(1, 6)]
237+
[rf"(?:\s*(?:{and_word}|{or_word}|,)\s*{make_latex_env_pattern(f'next{i}_')})?" for i in range(1, 6)]
241238
)
242239

243240
latex_envs_re = rf"(?:{first_latex_group}{next_groups})"
@@ -269,7 +266,7 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
269266
latex_re_boxed = make_latex_env_pattern(prefix="first_", context="boxed")
270267
next_groups = "".join(
271268
[
272-
rf"(?:\s*(?:{and_word}|{or_word})\s*{make_latex_env_pattern(f'next{i}_', context='boxed')})?"
269+
rf"(?:\s*(?:{and_word}|{or_word}|,)\s*{make_latex_env_pattern(f'next{i}_', context='boxed')})?"
273270
for i in range(1, 6)
274271
]
275272
)

tests/metrics/test_extractive_match.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ def test_math_extraction_additional_cases(gold, pred, expected):
10351035
),
10361036
(
10371037
r"$(2,1),(1,2),(-1,-20),(-20,-1)$",
1038-
r"solutions are:\n\n\\[\n\\boxed{(1, 2)}, \\boxed{(2, 1)}, \\boxed{(-1, -20)}, \\boxed{(-20, -1)}\n\\]",
1038+
"solutions are:\n\n\\[\n\\boxed{(1, 2)}, \\boxed{(2, 1)}, \\boxed{(-1, -20)}, \\boxed{(-20, -1)}\n\\]",
10391039
1,
10401040
),
10411041
(
@@ -1121,6 +1121,16 @@ def test_math_extraction_additional_cases(gold, pred, expected):
11211121
r"$\boxed{10^{\frac{\sqrt{13} - 5}{6}}} \quad \text{and} \quad \boxed{10^{-\frac{5 + \sqrt{13}}{6}}}$",
11221122
1,
11231123
),
1124+
(
1125+
r"\boxed{1} and and and or thus but \boxed{2} and \boxed{3}",
1126+
r"$\boxed{2,3}$",
1127+
1,
1128+
),
1129+
(
1130+
r"\boxed{1} and and and or thus but \boxed{2} and \boxed{3}",
1131+
r"$\boxed{1,2,3}$",
1132+
0,
1133+
),
11241134
],
11251135
)
11261136
def test_math_numina_cases(gold, pred, expected):

0 commit comments

Comments
 (0)