1414from .utils import extract_text_from_outputs , validate_inputs
1515
1616
17- def parse_score_from_evaluator_output (evaluator_output : str , valid_score_range : tuple [int , int ] | None ) -> int | None :
17+ def parse_score_from_evaluator_output (
18+ evaluator_output : str , valid_score_range : tuple [int , int ] | None , regex_to_parse_score : str = r"(\d+)"
19+ ) -> int | None :
1820 """Extract the last integer value from the evaluator output.
1921
2022 Return None if parsing fails.
2123 """
22- matched = re .findall (r"(\d+)" , evaluator_output )
24+ matched = re .findall (regex_to_parse_score , evaluator_output )
2325 if not matched :
2426 return None
2527
@@ -182,6 +184,7 @@ class LLMScore(Metric):
182184 category_key: A key to create category-wise mean score.
183185 The category key is expected to be in extra_info.
184186 metric_prefix: A prefix to be added to the metric keys in the summary and instance details.
187+ regex_to_parse_score: A regular expression to parse score.
185188
186189 Examples:
187190 >>> from flexeval import LLMScore, OpenAIChatAPI, Jinja2PromptTemplate
@@ -216,6 +219,7 @@ def __init__(
216219 valid_score_range : tuple [int , int ] | None = None ,
217220 category_key : str | None = None ,
218221 metric_prefix : str | None = None ,
222+ regex_to_parse_score : str = r"(\d+)" ,
219223 ) -> None :
220224 self .language_model = language_model
221225 self .prompt_template = prompt_template
@@ -224,6 +228,7 @@ def __init__(
224228 self .valid_score_range = valid_score_range
225229 self .category_key = category_key
226230 self .metric_prefix = f"{ metric_prefix } -" if metric_prefix else ""
231+ self .regex_to_parse_score = regex_to_parse_score
227232
228233 def evaluate (
229234 self ,
@@ -254,6 +259,7 @@ def evaluate(
254259 evaluator_score = parse_score_from_evaluator_output (
255260 evaluator_output .text ,
256261 valid_score_range = self .valid_score_range ,
262+ regex_to_parse_score = self .regex_to_parse_score ,
257263 )
258264 if evaluator_score is None :
259265 logger .warning (f"Failed to parse score from evaluator output: { evaluator_output } " )
0 commit comments