Skip to content

Commit 90d44c1

Browse files
Fix math extraction (#503)
* extract matching * better docstring * lazy imports * bump up math * Update src/lighteval/metrics/dynamic_metrics.py Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com> * fix pr commnets * Apply suggestions from code review Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com> * rename comparisson -> comparison * fix expr numbers extraction with currency or units * add test for correct extraction of failed answer --------- Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
1 parent 59624c8 commit 90d44c1

File tree

3 files changed

+30
-19
lines changed

3 files changed

+30
-19
lines changed

src/lighteval/metrics/utils/extractive_match_utils.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -103,42 +103,42 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
103103
operators_re = "".join(operators)
104104
all_expr_chars = r"[\d\.\s" + operators_re + r"]"
105105
# Expression should have at minimum at least one operator and must start with a digit
106-
expr_re = rf"-?\(?-?\d{all_expr_chars}*[{operators_re}]{all_expr_chars}+\)?"
106+
expr_re = rf"(?P<expr>-?\(?-?\d{all_expr_chars}*[{operators_re}]{all_expr_chars}+\)?)"
107107

108108
# Punctuation regexes
109109
full_stop_re = rf"[{re.escape(translation_literal.full_stop)}\.]"
110110
comma_re = rf"[{re.escape(translation_literal.comma)}\,]"
111111
colon_re = rf"[{re.escape(translation_literal.colon)}\:]"
112112
space_re = rf"(?:\s|{re.escape(translation_literal.sentence_space)})"
113113

114+
currency_units = re.escape("$€£¥₹₽₪₩₫฿₡₢₣₤₥₦₧₨₩₪₫₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽₾₿")
114115
expr_prefix_re = rf"(?:^|{space_re}|\=)(?:\*\*)?"
115116
expr_suffix_re = rf"(?:\*\*)?(?:{full_stop_re}|{comma_re}|{colon_re}|{space_re}|\)|\$|$)"
116-
117-
expr = f"(?P<expr>{expr_re}|{number_re})"
118-
full_expr = rf"(?:{expr_prefix_re}{expr}{expr_suffix_re})"
117+
# Expressions must be prefixed and suffixed while, digits don't need suffix and can have currency units preceeded, this is to ensure
118+
# That we can extract stuff like $100 or 100m2, while we don't extract XDY2K as 2
119+
expr_with_anchors = rf"(?:{expr_prefix_re}{expr_re}{expr_suffix_re})"
120+
number_with_anchors = rf"(?:{expr_prefix_re}[{currency_units}]?{number_re})"
121+
expr_or_number = rf"(?:{expr_with_anchors}|{number_with_anchors})"
119122
regexes: list[tuple[str, int]] = []
120123

121124
# Ideally we would have translation of such concept in each language
122125
if language == Language.ENGLISH:
123-
final_answer_prefixed_re = rf"(?i:final answer is)\:?\s*{full_expr}\.?\s?I hope"
124-
final_answer_prefixed_just_is = rf"(?i:final answer.{{0,100}}?)\s+is\:?{full_expr}"
126+
final_answer_prefixed_re = rf"(?i:final answer is)\:?\s*{expr_or_number}\.?\s?I hope"
127+
final_answer_prefixed_just_is = rf"(?i:final answer.{{0,100}}?)\s+is\:?{expr_or_number}"
125128
regexes.append((final_answer_prefixed_re, 0))
126129
regexes.append((final_answer_prefixed_just_is, 50))
127130

128131
answer_prefix_re = rf"(?i:{translation_literal.answer})"
129132

130133
# Match after the last equals with answer word - require the number pattern,
131-
equals_re_colon = rf"{answer_prefix_re}{colon_re}(?:.{{0,100}}=\s*|.{{0,50}}?){full_expr}(?!\s*=)"
132-
equals_re = rf"{answer_prefix_re}(?:.{{0,100}}=\s*|.{{0,50}}?){full_expr}(?!\s*=)"
134+
equals_re_colon = rf"{answer_prefix_re}{colon_re}(?:.{{0,100}}=\s*|.{{0,50}}?){expr_or_number}(?!\s*=)"
135+
equals_re = rf"{answer_prefix_re}(?:.{{0,100}}=\s*|.{{0,50}}?){expr_or_number}(?!\s*=)"
133136
regexes.extend([(equals_re_colon, 100), (equals_re, 200)])
134137

135138
if expr_config.try_extract_without_anchor:
136139
# If everything fails, try to match plain expr/number
137-
regexes.append((f"({expr_prefix_re})(?P<expr>{expr_re})({expr_suffix_re})", 300))
138-
regexes.append((f"({expr_prefix_re})(?P<expr>{number_re})({expr_suffix_re})", 300))
139-
140-
# Worst case just ignore any prefix/suffix, e.g 1$ wouldn't be extracted otherwise
141-
regexes.append((f"((?P<expr>{number_re}))", 350))
140+
regexes.append((expr_with_anchors, 300))
141+
regexes.append((number_with_anchors, 300))
142142

143143
return [(re.compile(pattern), priority) for pattern, priority in regexes]
144144

@@ -299,7 +299,7 @@ def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]:
299299
# First combine the number
300300
groups = match.groupdict()
301301
# Expr group will always exist because every regex has it
302-
expr = groups["expr"]
302+
expr = groups.get("expr", "")
303303
integer = next((val for name, val in groups.items() if name.startswith("integer") and val), "")
304304
decimal = next((val for name, val in groups.items() if name.startswith("decimal") and val), "")
305305

@@ -321,10 +321,12 @@ def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]:
321321

322322
# Otherwise just return the expression
323323
# Remove new lines and spaces
324-
try:
325-
return parse_expr_with_timeout(expr.replace("\n", " ").replace("^", "**")), expr
326-
except: # noqa: E722
327-
return None, expr
324+
if expr:
325+
try:
326+
return parse_expr_with_timeout(expr.replace("\n", " ").replace("^", "**")), expr
327+
except: # noqa: E722
328+
pass
329+
return None, expr
328330

329331

330332
def convert_to_pct(number: Number):

tests/metrics/test_extractive_match.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,18 @@ def test_multilingual_extraction_math_latex_numbers(gold, pred, language, expect
258258
("0.4", ".4", 1),
259259
# Test decimals
260260
("1000.99", "1,000.99", 1),
261+
("1000.99", "1,000.99", 1),
261262
# Test with units like $
262263
("1000.99", "$1,000.99", 1),
263264
("1000.99", "1,000.99$", 1),
265+
# Test with currency units
266+
("1000.99", "the number is not 10 which is 1,000.99€", 1),
267+
("1000.99", "the number is not 10 which is 1,000.99€", 1),
268+
# Test m2
269+
("1000.99", "so the number is 10 which is 1,000.99m²", 1),
270+
("1000.99", "not it's not 10 it's 1,000.99m²", 1),
271+
# Test correct extraction of not correct answer
272+
("2", "AZYUK2A", 0),
264273
],
265274
)
266275
def test_number_extraction(gold, pred, expected):

tests/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222

23+
from pathlib import Path
2324
from types import ModuleType
2425
from typing import Optional, Union
2526
from unittest.mock import patch
2627

27-
from anyio import Path
2828
from transformers import AutoTokenizer
2929

3030
from lighteval.logging.evaluation_tracker import EvaluationTracker

0 commit comments

Comments
 (0)