@@ -130,7 +130,7 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
130130
131131 currency_units = re .escape ("$€£¥₹₽₪₩₫฿₡₢₣₤₥₦₧₨₩₪₫₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽₾₿" )
132132 expr_prefix_re = rf"(?:^|{ space_re } |\=)(?:\*\*)?"
133- expr_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |{ colon_re } |{ space_re } |\)|\$|$)"
133+ expr_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |\s? { colon_re } |{ space_re } |\)|\$|$)"
134134 # Expressions must be prefixed and suffixed while, digits don't need suffix and can have currency units preceeded, this is to ensure
135135 # That we can extract stuff like $100 or 100m2, while we don't extract XDY2K as 2
136136 expr_with_anchors = rf"(?:{ expr_prefix_re } { expr_re } { expr_suffix_re } )"
@@ -148,7 +148,7 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
148148 answer_prefix_re = rf"(?i:{ translation_literal .answer } )"
149149
150150 # Match after the last equals with answer word - require the number pattern,
151- equals_re_colon = rf"{ answer_prefix_re } { colon_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ expr_or_number } (?!\s*=)"
151+ equals_re_colon = rf"{ answer_prefix_re } \s? { colon_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ expr_or_number } (?!\s*=)"
152152 equals_re = rf"{ answer_prefix_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ expr_or_number } (?!\s*=)"
153153 regexes .extend ([(equals_re_colon , 100 ), (equals_re , 200 )])
154154
@@ -253,7 +253,7 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
253253 regexes .append ((final_answer_prefixed_just_is , 50 ))
254254
255255 # Match with answer word - higher priority than plain latex
256- answer_re_colon = f "{ answer_prefix_re } { colon_re } .{{0,50}}?{ latex_re } "
256+ answer_re_colon = rf "{ answer_prefix_re } \s? { colon_re } .{{0,50}}?{ latex_re } "
257257 answer_re = f"{ answer_prefix_re } .{{0,50}}?{ latex_re } "
258258
259259 regexes .extend ([(answer_re_colon , 100 ), (answer_re , 200 )])
@@ -299,7 +299,7 @@ def lazy_indices_regex(
299299 space_re = re .escape (translation_literal .sentence_space )
300300
301301 answer_prefix_re = rf"(?:^|{ space_re } )(?:\*\*)?"
302- answer_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |{ colon_re } |{ space_re } |$)"
302+ answer_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |\s? { colon_re } |{ space_re } |$)"
303303 answer_re = f"{ answer_prefix_re } { indice_str_re } { answer_suffix_re } "
304304 answer_re_start = rf"^(?:\*\*)?{ indice_str_re } { answer_suffix_re } "
305305 answer_re_line_start = rf"\n(?:\*\*)?{ indice_str_re } { answer_suffix_re } "
@@ -322,7 +322,7 @@ def lazy_indices_regex(
322322 regexes .extend (
323323 [
324324 # Most specific patterns first
325- (f "{ answer_word } { colon_re } .{{0,50}}?{ answer_re } " , 100 ),
325+ (rf "{ answer_word } \s? { colon_re } .{{0,50}}?{ answer_re } " , 100 ),
326326 # Answer word patterns
327327 (f"{ answer_word } .{{0,50}}?{ answer_re } " , 150 ),
328328 # Start of the string
0 commit comments