@@ -103,42 +103,42 @@ def lazy_expr_regex(expr_config: ExprExtractionConfig, language: Language) -> li
103103 operators_re = "" .join (operators )
104104 all_expr_chars = r"[\d\.\s" + operators_re + r"]"
105105 # Expression should have at minimum at least one operator and must start with a digit
106- expr_re = rf"-?\(?-?\d{ all_expr_chars } *[{ operators_re } ]{ all_expr_chars } +\)?"
106+ expr_re = rf"(?P<expr> -?\(?-?\d{ all_expr_chars } *[{ operators_re } ]{ all_expr_chars } +\)?) "
107107
108108 # Punctuation regexes
109109 full_stop_re = rf"[{ re .escape (translation_literal .full_stop )} \.]"
110110 comma_re = rf"[{ re .escape (translation_literal .comma )} \,]"
111111 colon_re = rf"[{ re .escape (translation_literal .colon )} \:]"
112112 space_re = rf"(?:\s|{ re .escape (translation_literal .sentence_space )} )"
113113
114+ currency_units = re .escape ("$€£¥₹₽₪₩₫฿₡₢₣₤₥₦₧₨₩₪₫₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽₾₿" )
114115 expr_prefix_re = rf"(?:^|{ space_re } |\=)(?:\*\*)?"
115116 expr_suffix_re = rf"(?:\*\*)?(?:{ full_stop_re } |{ comma_re } |{ colon_re } |{ space_re } |\)|\$|$)"
116-
117- expr = f"(?P<expr>{ expr_re } |{ number_re } )"
118- full_expr = rf"(?:{ expr_prefix_re } { expr } { expr_suffix_re } )"
117+ # Expressions must be prefixed and suffixed while, digits don't need suffix and can have currency units preceeded, this is to ensure
118+ # That we can extract stuff like $100 or 100m2, while we don't extract XDY2K as 2
119+ expr_with_anchors = rf"(?:{ expr_prefix_re } { expr_re } { expr_suffix_re } )"
120+ number_with_anchors = rf"(?:{ expr_prefix_re } [{ currency_units } ]?{ number_re } )"
121+ expr_or_number = rf"(?:{ expr_with_anchors } |{ number_with_anchors } )"
119122 regexes : list [tuple [str , int ]] = []
120123
121124 # Ideally we would have translation of such concept in each language
122125 if language == Language .ENGLISH :
123- final_answer_prefixed_re = rf"(?i:final answer is)\:?\s*{ full_expr } \.?\s?I hope"
124- final_answer_prefixed_just_is = rf"(?i:final answer.{{0,100}}?)\s+is\:?{ full_expr } "
126+ final_answer_prefixed_re = rf"(?i:final answer is)\:?\s*{ expr_or_number } \.?\s?I hope"
127+ final_answer_prefixed_just_is = rf"(?i:final answer.{{0,100}}?)\s+is\:?{ expr_or_number } "
125128 regexes .append ((final_answer_prefixed_re , 0 ))
126129 regexes .append ((final_answer_prefixed_just_is , 50 ))
127130
128131 answer_prefix_re = rf"(?i:{ translation_literal .answer } )"
129132
130133 # Match after the last equals with answer word - require the number pattern,
131- equals_re_colon = rf"{ answer_prefix_re } { colon_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ full_expr } (?!\s*=)"
132- equals_re = rf"{ answer_prefix_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ full_expr } (?!\s*=)"
134+ equals_re_colon = rf"{ answer_prefix_re } { colon_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ expr_or_number } (?!\s*=)"
135+ equals_re = rf"{ answer_prefix_re } (?:.{{0,100}}=\s*|.{{0,50}}?){ expr_or_number } (?!\s*=)"
133136 regexes .extend ([(equals_re_colon , 100 ), (equals_re , 200 )])
134137
135138 if expr_config .try_extract_without_anchor :
136139 # If everything fails, try to match plain expr/number
137- regexes .append ((f"({ expr_prefix_re } )(?P<expr>{ expr_re } )({ expr_suffix_re } )" , 300 ))
138- regexes .append ((f"({ expr_prefix_re } )(?P<expr>{ number_re } )({ expr_suffix_re } )" , 300 ))
139-
140- # Worst case just ignore any prefix/suffix, e.g 1$ wouldn't be extracted otherwise
141- regexes .append ((f"((?P<expr>{ number_re } ))" , 350 ))
140+ regexes .append ((expr_with_anchors , 300 ))
141+ regexes .append ((number_with_anchors , 300 ))
142142
143143 return [(re .compile (pattern ), priority ) for pattern , priority in regexes ]
144144
@@ -299,7 +299,7 @@ def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]:
299299 # First combine the number
300300 groups = match .groupdict ()
301301 # Expr group will always exist because every regex has it
302- expr = groups [ "expr" ]
302+ expr = groups . get ( "expr" , "" )
303303 integer = next ((val for name , val in groups .items () if name .startswith ("integer" ) and val ), "" )
304304 decimal = next ((val for name , val in groups .items () if name .startswith ("decimal" ) and val ), "" )
305305
@@ -321,10 +321,12 @@ def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]:
321321
322322 # Otherwise just return the expression
323323 # Remove new lines and spaces
324- try :
325- return parse_expr_with_timeout (expr .replace ("\n " , " " ).replace ("^" , "**" )), expr
326- except : # noqa: E722
327- return None , expr
324+ if expr :
325+ try :
326+ return parse_expr_with_timeout (expr .replace ("\n " , " " ).replace ("^" , "**" )), expr
327+ except : # noqa: E722
328+ pass
329+ return None , expr
328330
329331
330332def convert_to_pct (number : Number ):
0 commit comments