From 0bfbac4d7904c86b4a9ecde9f85ba68e702cf875 Mon Sep 17 00:00:00 2001 From: "w.zhang" <136773451+weizhang25@users.noreply.github.com> Date: Fri, 9 Jan 2026 06:18:27 +0800 Subject: [PATCH] Use compiled regex objects instead of raw pattern strings. --- .../graders/agent/tool/tool_call_accuracy.py | 10 +++--- .../graders/agent/tool/tool_call_success.py | 29 --------------- openjudge/graders/code/code_excution.py | 9 +++-- openjudge/graders/code/code_style.py | 19 +++++----- openjudge/graders/code/syntax_checker.py | 5 +-- .../format/ngram_repetition_penalty.py | 5 +-- openjudge/graders/format/reasoning_format.py | 9 ++--- .../graders/format/reasoning_tool_format.py | 35 +++++++++++-------- openjudge/graders/text/number_accuracy.py | 4 +-- 9 files changed, 56 insertions(+), 69 deletions(-) diff --git a/openjudge/graders/agent/tool/tool_call_accuracy.py b/openjudge/graders/agent/tool/tool_call_accuracy.py index f97d10e3..a164a421 100644 --- a/openjudge/graders/agent/tool/tool_call_accuracy.py +++ b/openjudge/graders/agent/tool/tool_call_accuracy.py @@ -220,6 +220,11 @@ def __init__( language=language, ) + # Pattern to match tool calls in JSON format + self._tool_call_pattern = re.compile( + r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}', flags=re.DOTALL + ) + def _parse_tools_from_response( self, response: str, @@ -233,10 +238,7 @@ def _parse_tools_from_response( List of parsed tool calls. """ tool_calls = [] - - # Pattern to match tool calls in JSON format - tool_call_pattern = r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}' - matches = re.findall(tool_call_pattern, response, re.DOTALL) + matches = self._tool_call_pattern.findall(response) for match in matches: try: diff --git a/openjudge/graders/agent/tool/tool_call_success.py b/openjudge/graders/agent/tool/tool_call_success.py index e4b135d0..4cb350d5 100644 --- a/openjudge/graders/agent/tool/tool_call_success.py +++ b/openjudge/graders/agent/tool/tool_call_success.py @@ -6,7 +6,6 @@ """ import json -import re import textwrap from typing import Any, Dict, List, Optional, Union @@ -244,34 +243,6 @@ def __init__( ) self.template = template or DEFAULT_TOOL_CALL_SUCCESS_TEMPLATE - def _parse_tools_from_response( - self, - response: str, - ) -> List[Dict[str, Any]]: - """Extract tool calls from the response. - - Args: - response: The response string to extract tool calls from. - - Returns: - List of parsed tool calls. - """ - tool_calls = [] - - # Pattern to match tool calls in JSON format - tool_call_pattern = r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}' - matches = re.findall(tool_call_pattern, response, re.DOTALL) - - for match in matches: - try: - tool_call = json.loads(match) - tool_calls.append(tool_call) - except json.JSONDecodeError: - # Skip invalid JSON - continue - - return tool_calls - async def aevaluate( self, tool_definitions: Union[Dict[str, Any], List[Dict[str, Any]]], diff --git a/openjudge/graders/code/code_excution.py b/openjudge/graders/code/code_excution.py index 3933faa2..9637d029 100644 --- a/openjudge/graders/code/code_excution.py +++ b/openjudge/graders/code/code_excution.py @@ -60,6 +60,11 @@ def __init__( ) self.test_framework_available = False + # Python code pattern in various formats + self._python_code_pattern = re.compile(r"```python\n(.*?)\n```", flags=re.DOTALL) + # generic code formats + self._generic_code_pattern = re.compile(r"```\n(.*?)\n```", flags=re.DOTALL) + def _extract_code(self, content: str) -> str: """ Extract code from content @@ -71,12 +76,12 @@ def _extract_code(self, content: str) -> str: Extracted code """ # Try to find Python code in various formats - code_match = re.search(r"```python\n(.*?)\n```", content, re.DOTALL) + code_match = self._python_code_pattern.search(content) if code_match: return code_match.group(1) # Try other formats - code_match = re.search(r"```\n(.*?)\n```", content, re.DOTALL) + code_match = self._generic_code_pattern.search(content) if code_match: return code_match.group(1) diff --git a/openjudge/graders/code/code_style.py b/openjudge/graders/code/code_style.py index 2deab412..c72b67a0 100644 --- a/openjudge/graders/code/code_style.py +++ b/openjudge/graders/code/code_style.py @@ -27,6 +27,11 @@ def __init__(self): description="Basic code style checking including indentation consistency and naming conventions.", ) + self._function_pattern = re.compile(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(") + self._variable_pattern = re.compile(r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=") + self._snake_case_pattern = re.compile(r"^[a-z_][a-z0-9_]*$") + self._code_pattern = re.compile(r"```(?:python)?\s*\n(.*?)\n\s*```", re.DOTALL) + def _check_indentation(self, code: str) -> tuple[bool, str]: """Check indentation consistency""" lines = code.split("\n") @@ -58,11 +63,8 @@ def _check_indentation(self, code: str) -> tuple[bool, str]: def _check_naming(self, code: str) -> tuple[float, str]: """Check naming conventions""" # Simple naming check - function_pattern = r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(" - variable_pattern = r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=" - - functions = re.findall(function_pattern, code) - variables = re.findall(variable_pattern, code) + functions = self._function_pattern.findall(code) + variables = self._variable_pattern.findall(code) total_names = len(functions) + len(variables) if total_names == 0: @@ -72,12 +74,12 @@ def _check_naming(self, code: str) -> tuple[float, str]: # Check function names (should be snake_case) for func in functions: - if re.match(r"^[a-z_][a-z0-9_]*$", func): + if self._snake_case_pattern.match(func): good_names += 1 # Check variable names (should be snake_case) for var in variables: - if re.match(r"^[a-z_][a-z0-9_]*$", var): + if self._snake_case_pattern.match(var): good_names += 1 score = good_names / total_names @@ -122,8 +124,7 @@ async def aevaluate(self, response: str) -> GraderScore: 0.5 Code style score: 0.500; Consistent indentation; Naming convention: 1/2 names follow snake_case """ # Extract code blocks - code_pattern = r"```(?:python)?\s*\n(.*?)\n\s*```" - code_blocks = re.findall(code_pattern, response, re.DOTALL) + code_blocks = self._code_pattern.findall(response) if not code_blocks: return GraderScore( diff --git a/openjudge/graders/code/syntax_checker.py b/openjudge/graders/code/syntax_checker.py index 5544a4de..890a4442 100644 --- a/openjudge/graders/code/syntax_checker.py +++ b/openjudge/graders/code/syntax_checker.py @@ -31,6 +31,8 @@ def __init__(self): description="Check code syntax using Abstract Syntax Tree to validate Python code blocks.", ) + self._code_pattern = re.compile(r"```(?:python)?\s*\n(.*?)\n\s*```", re.DOTALL) + async def aevaluate(self, response: str) -> GraderScore: """Check code syntax in the provided response. @@ -68,8 +70,7 @@ async def aevaluate(self, response: str) -> GraderScore: """ # Extract code blocks - code_pattern = r"```(?:python)?\s*\n(.*?)\n\s*```" - code_blocks = re.findall(code_pattern, response, re.DOTALL) + code_blocks = self._code_pattern.findall(response) if not code_blocks: # No code blocks, return neutral score diff --git a/openjudge/graders/format/ngram_repetition_penalty.py b/openjudge/graders/format/ngram_repetition_penalty.py index e43719a0..371f8fa5 100644 --- a/openjudge/graders/format/ngram_repetition_penalty.py +++ b/openjudge/graders/format/ngram_repetition_penalty.py @@ -67,10 +67,11 @@ def __init__( chinese_only=chinese_only, ) + self._think_pattern = re.compile(r"(.*?)", flags=re.DOTALL) + def _extract_thought_process(self, content: str) -> str: """Extract thought process""" - think_pattern = r"(.*?)" - matches = re.findall(think_pattern, content, re.DOTALL) + matches = self._think_pattern.findall(content) return " ".join(matches) if matches else "" def _generate_ngrams(self, tokens: List[str]) -> List[tuple]: diff --git a/openjudge/graders/format/reasoning_format.py b/openjudge/graders/format/reasoning_format.py index f2c95235..d0ec45e7 100644 --- a/openjudge/graders/format/reasoning_format.py +++ b/openjudge/graders/format/reasoning_format.py @@ -34,7 +34,10 @@ def __init__(self, think_token: str = "think", answer_token: str = "answer"): description="Check format reward for thinking format and answer format with proper tags.", ) self.think_token = think_token + self.think_pattern = re.compile(f"<{self.think_token}>.*?", flags=re.DOTALL) + self.answer_token = answer_token + self.answer_pattern = re.compile(f"<{self.answer_token}>.*?", flags=re.DOTALL) # pylint: disable=unused-argument async def aevaluate(self, response: str, *args: Any, **kwargs: Any) -> GraderScore: @@ -73,12 +76,10 @@ async def aevaluate(self, response: str, *args: Any, **kwargs: Any) -> GraderSco """ # Check thinking format tags - think_pattern = f"<{self.think_token}>.*?" - has_think_tag = bool(re.search(think_pattern, response, re.DOTALL)) + has_think_tag = bool(self.think_pattern.search(response)) # Check answer format tags - answer_pattern = f"<{self.answer_token}>.*?" - has_answer_tag = bool(re.search(answer_pattern, response, re.DOTALL)) + has_answer_tag = bool(self.answer_pattern.search(response)) # Calculate reward reward = 1.0 if has_think_tag and has_answer_tag else 0.0 diff --git a/openjudge/graders/format/reasoning_tool_format.py b/openjudge/graders/format/reasoning_tool_format.py index e663ed5c..4bf56ab8 100644 --- a/openjudge/graders/format/reasoning_tool_format.py +++ b/openjudge/graders/format/reasoning_tool_format.py @@ -26,6 +26,19 @@ def __init__(self) -> None: description="Check tool call format including think, answer and tool_call tags with JSON validation.", ) + # patterns for identifiying tags + self._think_pattern = re.compile(r"(.*?)", re.DOTALL) + self._answer_pattern = re.compile(r"(.*?)", re.DOTALL) + self._tool_call_pattern = re.compile(r"(.*?)", re.DOTALL) + + self._think_answer_pattern = re.compile(r"^\s*.*?\s*.*?\s*$", re.DOTALL) + self._think_tool_call_pattern = re.compile( + r"^\s*.*?\s*(?:.*?\s*)+$", re.DOTALL + ) + + self._consecutive_start_tool_call_tag_pattern = re.compile(r"\s*") + self._consecutive_end_tool_call_tag_pattern = re.compile(r"\s*") + # pylint: disable=too-many-statements async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore: """ @@ -69,13 +82,9 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore: """ # Extract tag contents - think_pattern = r"(.*?)" - answer_pattern = r"(.*?)" - tool_call_pattern = r"(.*?)" - - think_matches = re.search(think_pattern, response, re.DOTALL) - answer_matches = re.search(answer_pattern, response, re.DOTALL) - tool_call_matches = re.findall(tool_call_pattern, response, re.DOTALL) + think_matches = self._think_pattern.search(response) + answer_matches = self._answer_pattern.search(response) + tool_call_matches = self._tool_call_pattern.findall(response) has_think_tag = think_matches is not None has_answer_tag = answer_matches is not None @@ -89,9 +98,8 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore: # Case 1: + if has_answer_tag and not has_tool_call_tag: # Check overall format - format_pattern = r"^\s*.*?\s*.*?\s*$" valid_format = bool( - re.match(format_pattern, response, re.DOTALL), + self._think_answer_pattern.match(response), ) # Check tag occurrence count @@ -115,9 +123,8 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore: # Case 2: + elif has_tool_call_tag and not has_answer_tag: # Check overall format - format_pattern = r"^\s*.*?\s*(?:.*?\s*)+$" valid_format = bool( - re.match(format_pattern, response, re.DOTALL), + self._think_tool_call_pattern.match(response), ) # Check tag occurrence count @@ -133,11 +140,9 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore: # Check for consecutive duplicate tags if valid_format: - if re.search( - r"\s*", + if self._consecutive_end_tool_call_tag_pattern.search( response, - ) or re.search( - r"\s*", + ) or self._consecutive_start_tool_call_tag_pattern.search( response, ): valid_format = False diff --git a/openjudge/graders/text/number_accuracy.py b/openjudge/graders/text/number_accuracy.py index 2de4c9ed..6cf705ef 100644 --- a/openjudge/graders/text/number_accuracy.py +++ b/openjudge/graders/text/number_accuracy.py @@ -52,12 +52,12 @@ def __init__(self, tolerance: float = 1e-6, **kwargs: Any) -> None: **kwargs, ) self.tolerance = tolerance + self._number_pattern = re.compile(r"-?\d+\.?\d*") def _extract_numbers(self, text: str) -> List[float]: """Extract numbers from text""" # Match integers and floating point numbers - number_pattern = r"-?\d+\.?\d*" - numbers = re.findall(number_pattern, text) + numbers = self._number_pattern.findall(text) return [float(n) for n in numbers if n] async def aevaluate(self, response: str, reference_response: str) -> GraderScore: