Skip to content

Commit 126f908

Browse files
authored
Update extractive_match_utils.py for words where : is preceded by a space (#831)
* Update extractive_match_utils.py for words where `:` is preceded by a space * fix style
1 parent a455539 commit 126f908

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/lighteval/metrics/utils/extractive_match_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
130130

131131
currency_units = re.escape("$€£¥₹₽₪₩₫฿₡₢₣₤₥₦₧₨₩₪₫₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽₾₿")
132132
expr_prefix_re = rf"(?:^|{space_re}|\=)(?:\*\*)?"
133-
expr_suffix_re = rf"(?:\*\*)?(?:{full_stop_re}|{comma_re}|{colon_re}|{space_re}|\)|\$|$)"
133+
expr_suffix_re = rf"(?:\*\*)?(?:{full_stop_re}|{comma_re}|\s?{colon_re}|{space_re}|\)|\$|$)"
134134
# Expressions must be prefixed and suffixed while, digits don't need suffix and can have currency units preceeded, this is to ensure
135135
# That we can extract stuff like $100 or 100m2, while we don't extract XDY2K as 2
136136
expr_with_anchors = rf"(?:{expr_prefix_re}{expr_re}{expr_suffix_re})"
@@ -148,7 +148,7 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
148148
answer_prefix_re = rf"(?i:{translation_literal.answer})"
149149

150150
# Match after the last equals with answer word - require the number pattern,
151-
equals_re_colon = rf"{answer_prefix_re}{colon_re}(?:.{{0,100}}=\s*|.{{0,50}}?){expr_or_number}(?!\s*=)"
151+
equals_re_colon = rf"{answer_prefix_re}\s?{colon_re}(?:.{{0,100}}=\s*|.{{0,50}}?){expr_or_number}(?!\s*=)"
152152
equals_re = rf"{answer_prefix_re}(?:.{{0,100}}=\s*|.{{0,50}}?){expr_or_number}(?!\s*=)"
153153
regexes.extend([(equals_re_colon, 100), (equals_re, 200)])
154154

@@ -253,7 +253,7 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
253253
regexes.append((final_answer_prefixed_just_is, 50))
254254

255255
# Match with answer word - higher priority than plain latex
256-
answer_re_colon = f"{answer_prefix_re}{colon_re}.{{0,50}}?{latex_re}"
256+
answer_re_colon = rf"{answer_prefix_re}\s?{colon_re}.{{0,50}}?{latex_re}"
257257
answer_re = f"{answer_prefix_re}.{{0,50}}?{latex_re}"
258258

259259
regexes.extend([(answer_re_colon, 100), (answer_re, 200)])
@@ -299,7 +299,7 @@ def lazy_indices_regex(
299299
space_re = re.escape(translation_literal.sentence_space)
300300

301301
answer_prefix_re = rf"(?:^|{space_re})(?:\*\*)?"
302-
answer_suffix_re = rf"(?:\*\*)?(?:{full_stop_re}|{comma_re}|{colon_re}|{space_re}|$)"
302+
answer_suffix_re = rf"(?:\*\*)?(?:{full_stop_re}|{comma_re}|\s?{colon_re}|{space_re}|$)"
303303
answer_re = f"{answer_prefix_re}{indice_str_re}{answer_suffix_re}"
304304
answer_re_start = rf"^(?:\*\*)?{indice_str_re}{answer_suffix_re}"
305305
answer_re_line_start = rf"\n(?:\*\*)?{indice_str_re}{answer_suffix_re}"
@@ -322,7 +322,7 @@ def lazy_indices_regex(
322322
regexes.extend(
323323
[
324324
# Most specific patterns first
325-
(f"{answer_word}{colon_re}.{{0,50}}?{answer_re}", 100),
325+
(rf"{answer_word}\s?{colon_re}.{{0,50}}?{answer_re}", 100),
326326
# Answer word patterns
327327
(f"{answer_word}.{{0,50}}?{answer_re}", 150),
328328
# Start of the string

0 commit comments

Comments
 (0)