diff --git a/openjudge/graders/agent/tool/tool_call_accuracy.py b/openjudge/graders/agent/tool/tool_call_accuracy.py
index f97d10e3..a164a421 100644
--- a/openjudge/graders/agent/tool/tool_call_accuracy.py
+++ b/openjudge/graders/agent/tool/tool_call_accuracy.py
@@ -220,6 +220,11 @@ def __init__(
language=language,
)
+ # Pattern to match tool calls in JSON format
+ self._tool_call_pattern = re.compile(
+ r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}', flags=re.DOTALL
+ )
+
def _parse_tools_from_response(
self,
response: str,
@@ -233,10 +238,7 @@ def _parse_tools_from_response(
List of parsed tool calls.
"""
tool_calls = []
-
- # Pattern to match tool calls in JSON format
- tool_call_pattern = r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}'
- matches = re.findall(tool_call_pattern, response, re.DOTALL)
+ matches = self._tool_call_pattern.findall(response)
for match in matches:
try:
diff --git a/openjudge/graders/agent/tool/tool_call_success.py b/openjudge/graders/agent/tool/tool_call_success.py
index e4b135d0..4cb350d5 100644
--- a/openjudge/graders/agent/tool/tool_call_success.py
+++ b/openjudge/graders/agent/tool/tool_call_success.py
@@ -6,7 +6,6 @@
"""
import json
-import re
import textwrap
from typing import Any, Dict, List, Optional, Union
@@ -244,34 +243,6 @@ def __init__(
)
self.template = template or DEFAULT_TOOL_CALL_SUCCESS_TEMPLATE
- def _parse_tools_from_response(
- self,
- response: str,
- ) -> List[Dict[str, Any]]:
- """Extract tool calls from the response.
-
- Args:
- response: The response string to extract tool calls from.
-
- Returns:
- List of parsed tool calls.
- """
- tool_calls = []
-
- # Pattern to match tool calls in JSON format
- tool_call_pattern = r'\{\s*"name"\s*:\s*"[^"]*"\s*,\s*"arguments"\s*:\s*\{.*?\}\s*\}'
- matches = re.findall(tool_call_pattern, response, re.DOTALL)
-
- for match in matches:
- try:
- tool_call = json.loads(match)
- tool_calls.append(tool_call)
- except json.JSONDecodeError:
- # Skip invalid JSON
- continue
-
- return tool_calls
-
async def aevaluate(
self,
tool_definitions: Union[Dict[str, Any], List[Dict[str, Any]]],
diff --git a/openjudge/graders/code/code_excution.py b/openjudge/graders/code/code_excution.py
index 3933faa2..9637d029 100644
--- a/openjudge/graders/code/code_excution.py
+++ b/openjudge/graders/code/code_excution.py
@@ -60,6 +60,11 @@ def __init__(
)
self.test_framework_available = False
+ # Python code pattern in various formats
+ self._python_code_pattern = re.compile(r"```python\n(.*?)\n```", flags=re.DOTALL)
+ # generic code formats
+ self._generic_code_pattern = re.compile(r"```\n(.*?)\n```", flags=re.DOTALL)
+
def _extract_code(self, content: str) -> str:
"""
Extract code from content
@@ -71,12 +76,12 @@ def _extract_code(self, content: str) -> str:
Extracted code
"""
# Try to find Python code in various formats
- code_match = re.search(r"```python\n(.*?)\n```", content, re.DOTALL)
+ code_match = self._python_code_pattern.search(content)
if code_match:
return code_match.group(1)
# Try other formats
- code_match = re.search(r"```\n(.*?)\n```", content, re.DOTALL)
+ code_match = self._generic_code_pattern.search(content)
if code_match:
return code_match.group(1)
diff --git a/openjudge/graders/code/code_style.py b/openjudge/graders/code/code_style.py
index 2deab412..c72b67a0 100644
--- a/openjudge/graders/code/code_style.py
+++ b/openjudge/graders/code/code_style.py
@@ -27,6 +27,11 @@ def __init__(self):
description="Basic code style checking including indentation consistency and naming conventions.",
)
+ self._function_pattern = re.compile(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(")
+ self._variable_pattern = re.compile(r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
+ self._snake_case_pattern = re.compile(r"^[a-z_][a-z0-9_]*$")
+ self._code_pattern = re.compile(r"```(?:python)?\s*\n(.*?)\n\s*```", re.DOTALL)
+
def _check_indentation(self, code: str) -> tuple[bool, str]:
"""Check indentation consistency"""
lines = code.split("\n")
@@ -58,11 +63,8 @@ def _check_indentation(self, code: str) -> tuple[bool, str]:
def _check_naming(self, code: str) -> tuple[float, str]:
"""Check naming conventions"""
# Simple naming check
- function_pattern = r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\("
- variable_pattern = r"([a-zA-Z_][a-zA-Z0-9_]*)\s*="
-
- functions = re.findall(function_pattern, code)
- variables = re.findall(variable_pattern, code)
+ functions = self._function_pattern.findall(code)
+ variables = self._variable_pattern.findall(code)
total_names = len(functions) + len(variables)
if total_names == 0:
@@ -72,12 +74,12 @@ def _check_naming(self, code: str) -> tuple[float, str]:
# Check function names (should be snake_case)
for func in functions:
- if re.match(r"^[a-z_][a-z0-9_]*$", func):
+ if self._snake_case_pattern.match(func):
good_names += 1
# Check variable names (should be snake_case)
for var in variables:
- if re.match(r"^[a-z_][a-z0-9_]*$", var):
+ if self._snake_case_pattern.match(var):
good_names += 1
score = good_names / total_names
@@ -122,8 +124,7 @@ async def aevaluate(self, response: str) -> GraderScore:
0.5 Code style score: 0.500; Consistent indentation; Naming convention: 1/2 names follow snake_case
"""
# Extract code blocks
- code_pattern = r"```(?:python)?\s*\n(.*?)\n\s*```"
- code_blocks = re.findall(code_pattern, response, re.DOTALL)
+ code_blocks = self._code_pattern.findall(response)
if not code_blocks:
return GraderScore(
diff --git a/openjudge/graders/code/syntax_checker.py b/openjudge/graders/code/syntax_checker.py
index 5544a4de..890a4442 100644
--- a/openjudge/graders/code/syntax_checker.py
+++ b/openjudge/graders/code/syntax_checker.py
@@ -31,6 +31,8 @@ def __init__(self):
description="Check code syntax using Abstract Syntax Tree to validate Python code blocks.",
)
+ self._code_pattern = re.compile(r"```(?:python)?\s*\n(.*?)\n\s*```", re.DOTALL)
+
async def aevaluate(self, response: str) -> GraderScore:
"""Check code syntax in the provided response.
@@ -68,8 +70,7 @@ async def aevaluate(self, response: str) -> GraderScore:
"""
# Extract code blocks
- code_pattern = r"```(?:python)?\s*\n(.*?)\n\s*```"
- code_blocks = re.findall(code_pattern, response, re.DOTALL)
+ code_blocks = self._code_pattern.findall(response)
if not code_blocks:
# No code blocks, return neutral score
diff --git a/openjudge/graders/format/ngram_repetition_penalty.py b/openjudge/graders/format/ngram_repetition_penalty.py
index e43719a0..371f8fa5 100644
--- a/openjudge/graders/format/ngram_repetition_penalty.py
+++ b/openjudge/graders/format/ngram_repetition_penalty.py
@@ -67,10 +67,11 @@ def __init__(
chinese_only=chinese_only,
)
+ self._think_pattern = re.compile(r"(.*?)", flags=re.DOTALL)
+
def _extract_thought_process(self, content: str) -> str:
"""Extract thought process"""
- think_pattern = r"(.*?)"
- matches = re.findall(think_pattern, content, re.DOTALL)
+ matches = self._think_pattern.findall(content)
return " ".join(matches) if matches else ""
def _generate_ngrams(self, tokens: List[str]) -> List[tuple]:
diff --git a/openjudge/graders/format/reasoning_format.py b/openjudge/graders/format/reasoning_format.py
index f2c95235..d0ec45e7 100644
--- a/openjudge/graders/format/reasoning_format.py
+++ b/openjudge/graders/format/reasoning_format.py
@@ -34,7 +34,10 @@ def __init__(self, think_token: str = "think", answer_token: str = "answer"):
description="Check format reward for thinking format and answer format with proper tags.",
)
self.think_token = think_token
+ self.think_pattern = re.compile(f"<{self.think_token}>.*?{self.think_token}>", flags=re.DOTALL)
+
self.answer_token = answer_token
+ self.answer_pattern = re.compile(f"<{self.answer_token}>.*?{self.answer_token}>", flags=re.DOTALL)
# pylint: disable=unused-argument
async def aevaluate(self, response: str, *args: Any, **kwargs: Any) -> GraderScore:
@@ -73,12 +76,10 @@ async def aevaluate(self, response: str, *args: Any, **kwargs: Any) -> GraderSco
"""
# Check thinking format tags
- think_pattern = f"<{self.think_token}>.*?{self.think_token}>"
- has_think_tag = bool(re.search(think_pattern, response, re.DOTALL))
+ has_think_tag = bool(self.think_pattern.search(response))
# Check answer format tags
- answer_pattern = f"<{self.answer_token}>.*?{self.answer_token}>"
- has_answer_tag = bool(re.search(answer_pattern, response, re.DOTALL))
+ has_answer_tag = bool(self.answer_pattern.search(response))
# Calculate reward
reward = 1.0 if has_think_tag and has_answer_tag else 0.0
diff --git a/openjudge/graders/format/reasoning_tool_format.py b/openjudge/graders/format/reasoning_tool_format.py
index e663ed5c..4bf56ab8 100644
--- a/openjudge/graders/format/reasoning_tool_format.py
+++ b/openjudge/graders/format/reasoning_tool_format.py
@@ -26,6 +26,19 @@ def __init__(self) -> None:
description="Check tool call format including think, answer and tool_call tags with JSON validation.",
)
+ # patterns for identifiying tags
+ self._think_pattern = re.compile(r"(.*?)", re.DOTALL)
+ self._answer_pattern = re.compile(r"(.*?)", re.DOTALL)
+ self._tool_call_pattern = re.compile(r"(.*?)", re.DOTALL)
+
+ self._think_answer_pattern = re.compile(r"^\s*.*?\s*.*?\s*$", re.DOTALL)
+ self._think_tool_call_pattern = re.compile(
+ r"^\s*.*?\s*(?:.*?\s*)+$", re.DOTALL
+ )
+
+ self._consecutive_start_tool_call_tag_pattern = re.compile(r"\s*")
+ self._consecutive_end_tool_call_tag_pattern = re.compile(r"\s*")
+
# pylint: disable=too-many-statements
async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
"""
@@ -69,13 +82,9 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
"""
# Extract tag contents
- think_pattern = r"(.*?)"
- answer_pattern = r"(.*?)"
- tool_call_pattern = r"(.*?)"
-
- think_matches = re.search(think_pattern, response, re.DOTALL)
- answer_matches = re.search(answer_pattern, response, re.DOTALL)
- tool_call_matches = re.findall(tool_call_pattern, response, re.DOTALL)
+ think_matches = self._think_pattern.search(response)
+ answer_matches = self._answer_pattern.search(response)
+ tool_call_matches = self._tool_call_pattern.findall(response)
has_think_tag = think_matches is not None
has_answer_tag = answer_matches is not None
@@ -89,9 +98,8 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
# Case 1: +
if has_answer_tag and not has_tool_call_tag:
# Check overall format
- format_pattern = r"^\s*.*?\s*.*?\s*$"
valid_format = bool(
- re.match(format_pattern, response, re.DOTALL),
+ self._think_answer_pattern.match(response),
)
# Check tag occurrence count
@@ -115,9 +123,8 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
# Case 2: +
elif has_tool_call_tag and not has_answer_tag:
# Check overall format
- format_pattern = r"^\s*.*?\s*(?:.*?\s*)+$"
valid_format = bool(
- re.match(format_pattern, response, re.DOTALL),
+ self._think_tool_call_pattern.match(response),
)
# Check tag occurrence count
@@ -133,11 +140,9 @@ async def aevaluate(self, response: str, **kwargs: Any) -> GraderScore:
# Check for consecutive duplicate tags
if valid_format:
- if re.search(
- r"\s*",
+ if self._consecutive_end_tool_call_tag_pattern.search(
response,
- ) or re.search(
- r"\s*",
+ ) or self._consecutive_start_tool_call_tag_pattern.search(
response,
):
valid_format = False
diff --git a/openjudge/graders/text/number_accuracy.py b/openjudge/graders/text/number_accuracy.py
index 2de4c9ed..6cf705ef 100644
--- a/openjudge/graders/text/number_accuracy.py
+++ b/openjudge/graders/text/number_accuracy.py
@@ -52,12 +52,12 @@ def __init__(self, tolerance: float = 1e-6, **kwargs: Any) -> None:
**kwargs,
)
self.tolerance = tolerance
+ self._number_pattern = re.compile(r"-?\d+\.?\d*")
def _extract_numbers(self, text: str) -> List[float]:
"""Extract numbers from text"""
# Match integers and floating point numbers
- number_pattern = r"-?\d+\.?\d*"
- numbers = re.findall(number_pattern, text)
+ numbers = self._number_pattern.findall(text)
return [float(n) for n in numbers if n]
async def aevaluate(self, response: str, reference_response: str) -> GraderScore: