@@ -130,7 +130,7 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
130
130
131
131
currency_units = re .escape ("$€£¥₹₽₪₩₫฿₡₢₣₤₥₦₧₨₩₪₫₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽₾₿" )
132
132
expr_prefix_re = rf"(?:^|{ space_re } |\=)(?:\*\*)?"
133
- expr_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |{ colon_re } |{ space_re } |\)|\$|$)"
133
+ expr_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |\s? { colon_re } |{ space_re } |\)|\$|$)"
134
134
# Expressions must be prefixed and suffixed while, digits don't need suffix and can have currency units preceeded, this is to ensure
135
135
# That we can extract stuff like $100 or 100m2, while we don't extract XDY2K as 2
136
136
expr_with_anchors = rf"(?:{ expr_prefix_re } { expr_re } { expr_suffix_re } )"
@@ -148,7 +148,7 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
148
148
answer_prefix_re = rf"(?i:{ translation_literal .answer } )"
149
149
150
150
# Match after the last equals with answer word - require the number pattern,
151
- equals_re_colon = rf"{ answer_prefix_re } { colon_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ expr_or_number } (?!\s*=)"
151
+ equals_re_colon = rf"{ answer_prefix_re } \s? { colon_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ expr_or_number } (?!\s*=)"
152
152
equals_re = rf"{ answer_prefix_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ expr_or_number } (?!\s*=)"
153
153
regexes .extend ([(equals_re_colon , 100 ), (equals_re , 200 )])
154
154
@@ -253,7 +253,7 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
253
253
regexes .append ((final_answer_prefixed_just_is , 50 ))
254
254
255
255
# Match with answer word - higher priority than plain latex
256
- answer_re_colon = f "{ answer_prefix_re } { colon_re } .{{0,50}}?{ latex_re } "
256
+ answer_re_colon = rf "{ answer_prefix_re } \s? { colon_re } .{{0,50}}?{ latex_re } "
257
257
answer_re = f"{ answer_prefix_re } .{{0,50}}?{ latex_re } "
258
258
259
259
regexes .extend ([(answer_re_colon , 100 ), (answer_re , 200 )])
@@ -299,7 +299,7 @@ def lazy_indices_regex(
299
299
space_re = re .escape (translation_literal .sentence_space )
300
300
301
301
answer_prefix_re = rf"(?:^|{ space_re } )(?:\*\*)?"
302
- answer_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |{ colon_re } |{ space_re } |$)"
302
+ answer_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |\s? { colon_re } |{ space_re } |$)"
303
303
answer_re = f"{ answer_prefix_re } { indice_str_re } { answer_suffix_re } "
304
304
answer_re_start = rf"^(?:\*\*)?{ indice_str_re } { answer_suffix_re } "
305
305
answer_re_line_start = rf"\n(?:\*\*)?{ indice_str_re } { answer_suffix_re } "
@@ -322,7 +322,7 @@ def lazy_indices_regex(
322
322
regexes .extend (
323
323
[
324
324
# Most specific patterns first
325
- (f "{ answer_word } { colon_re } .{{0,50}}?{ answer_re } " , 100 ),
325
+ (rf "{ answer_word } \s? { colon_re } .{{0,50}}?{ answer_re } " , 100 ),
326
326
# Answer word patterns
327
327
(f"{ answer_word } .{{0,50}}?{ answer_re } " , 150 ),
328
328
# Start of the string
0 commit comments