refactor(graders,models): cleanup and improve code quality

XiaoBoAI · XiaoBoAI · commit 1d474f6e27b2 · 2026-01-12T15:33:02.000+08:00
- Refactor text graders normalization and tokenization utils
- Clean up number_accuracy, similarity, and string_match graders
- Improve math_expression_verify implementation
- Refactor openai_chat_model and qwen_vl_model
- Update zero_shot_evaluation documentation
- Add code cleanup guide documents
diff --git a/openjudge/graders/math/math_expression_verify.py b/openjudge/graders/math/math_expression_verify.py
@@ -21,10 +21,10 @@ class MathExpressionVerifyGrader(BaseGrader):
 
     def __init__(self, timeout_score: float = 1.0, **kwargs: Any):
         """
-        Initialize the MathVerifyGrader.
+        Initialize the MathExpressionVerifyGrader.
 
         Args:
-            timeout_score: Score to assign on timeout
+            timeout_score: Score to assign on timeout or exception.
         """
         super().__init__(
             name="math_verify",
@@ -56,7 +56,7 @@ async def aevaluate(self, response: str, reference_response: str) -> GraderScore
 
         Examples:
             >>> import asyncio
-            >>> grader = MathVerifyGrader()
+            >>> grader = MathExpressionVerifyGrader()
             >>> result = asyncio.run(grader.aevaluate("2+2", "4"))
             >>> print(result.score)
             1.0
diff --git a/openjudge/graders/text/_utils/normalization.py b/openjudge/graders/text/_utils/normalization.py
@@ -176,18 +176,18 @@ def remove_punctuation(text: str, keep_chars: Optional[str] = None) -> str:
 
 def normalize_for_comparison(text: str, method: str = "standard") -> str:
     """
-    根据指定方法归一化文本以进行比较
+    Normalize text for comparison using the specified method.
 
     Args:
-        text: 待归一化的文本
-        method: 归一化方法
-            - "standard": 标准归一化（小写 + 去标点 + 去冠词）
-            - "minimal": 最小归一化（仅去多余空格）
-            - "aggressive": 激进归一化（所有选项）
-            - "case_only": 仅大小写归一化
+        text: Text to normalize.
+        method: Normalization method.
+            - "standard": Standard normalization (lowercase + remove punctuation + remove articles).
+            - "minimal": Minimal normalization (only remove extra whitespace).
+            - "aggressive": Aggressive normalization (all options enabled).
+            - "case_only": Case normalization only.
 
     Returns:
-        str: 归一化后的文本
+        str: Normalized text.
 
     Example:
         >>> normalize_for_comparison("The Cat!", "standard")
@@ -214,14 +214,14 @@ def normalize_for_comparison(text: str, method: str = "standard") -> str:
 
 def normalize_numbers(text: str, replace_with: str = " NUMBER ") -> str:
     """
-    将数字替换为占位符
+    Replace numbers with a placeholder.
 
     Args:
-        text: 待处理的文本
-        replace_with: 替换的占位符
+        text: Text to process.
+        replace_with: Placeholder string to replace numbers with.
 
     Returns:
-        str: 替换后的文本
+        str: Text with numbers replaced.
 
     Example:
         >>> normalize_numbers("I have 3 apples and 5 oranges")
@@ -232,14 +232,14 @@ def normalize_numbers(text: str, replace_with: str = " NUMBER ") -> str:
 
 def normalize_urls(text: str, replace_with: str = " URL ") -> str:
     """
-    将 URL 替换为占位符
+    Replace URLs with a placeholder.
 
     Args:
-        text: 待处理的文本
-        replace_with: 替换的占位符
+        text: Text to process.
+        replace_with: Placeholder string to replace URLs with.
 
     Returns:
-        str: 替换后的文本
+        str: Text with URLs replaced.
 
     Example:
         >>> text = "Visit https://example.com for more info"
@@ -252,14 +252,14 @@ def normalize_urls(text: str, replace_with: str = " URL ") -> str:
 
 def normalize_emails(text: str, replace_with: str = " EMAIL ") -> str:
     """
-    将邮箱地址替换为占位符
+    Replace email addresses with a placeholder.
 
     Args:
-        text: 待处理的文本
-        replace_with: 替换的占位符
+        text: Text to process.
+        replace_with: Placeholder string to replace emails with.
 
     Returns:
-        str: 替换后的文本
+        str: Text with email addresses replaced.
 
     Example:
         >>> text = "Contact me at user@example.com"
diff --git a/openjudge/graders/text/_utils/tokenization.py b/openjudge/graders/text/_utils/tokenization.py
@@ -2,7 +2,7 @@
 """
 Tokenization Utilities
 
-分词工具，用于将文本分解为词元（tokens）。
+Tokenization tools for breaking text into tokens.
 """
 
 import re
@@ -11,14 +11,14 @@
 
 def simple_tokenize(text: str, lowercase: bool = False) -> List[str]:
     """
-    简单分词（基于空格）
+    Simple tokenization based on whitespace.
 
     Args:
-        text: 待分词的文本
-        lowercase: 是否转换为小写
+        text: Text to tokenize.
+        lowercase: Whether to convert to lowercase.
 
     Returns:
-        List[str]: 词元列表
+        List[str]: List of tokens.
 
     Example:
         >>> simple_tokenize("Hello, world!")
@@ -35,16 +35,14 @@ def simple_tokenize(text: str, lowercase: bool = False) -> List[str]:
 
 def word_tokenize(text: str, remove_punctuation: bool = True) -> List[str]:
     """
-    单词级分词
-
-    使用正则表达式分词，可选择是否移除标点。
+    Word-level tokenization using regex.
 
     Args:
-        text: 待分词的文本
-        remove_punctuation: 是否移除标点符号
+        text: Text to tokenize.
+        remove_punctuation: Whether to remove punctuation marks.
 
     Returns:
-        List[str]: 词元列表
+        List[str]: List of tokens.
 
     Example:
         >>> word_tokenize("Hello, world!")
@@ -53,25 +51,25 @@ def word_tokenize(text: str, remove_punctuation: bool = True) -> List[str]:
         ['Hello', ',', 'world', '!']
     """
     if remove_punctuation:
-        # 只保留字母、数字和空格
+        # Keep only letters, numbers, and spaces
         text = _non_word_space_pattern.sub(" ", text)
         tokens = text.split()
     else:
-        # 保留标点，但将其作为独立的token
+        # Keep punctuation as separate tokens
         tokens = _word_punctuation_pattern.findall(text)
 
     return [t for t in tokens if t.strip()]
 
 
 def character_tokenize(text: str) -> List[str]:
     """
-    字符级分词
+    Character-level tokenization.
 
     Args:
-        text: 待分词的文本
+        text: Text to tokenize.
 
     Returns:
-        List[str]: 字符列表
+        List[str]: List of characters.
 
     Example:
         >>> character_tokenize("hello")
@@ -82,15 +80,15 @@ def character_tokenize(text: str) -> List[str]:
 
 def ngram_tokenize(text: str, n: int = 2, char_level: bool = False) -> List[str]:
     """
-    N-gram 分词
+    N-gram tokenization.
 
     Args:
-        text: 待分词的文本
-        n: N-gram 的大小
-        char_level: 是否为字符级 n-gram（否则为词级）
+        text: Text to tokenize.
+        n: Size of the n-gram.
+        char_level: Whether to use character-level n-grams (otherwise word-level).
 
     Returns:
-        List[str]: N-gram 列表
+        List[str]: List of n-grams.
 
     Example:
         >>> ngram_tokenize("hello world", n=2, char_level=True)
@@ -121,22 +119,19 @@ def ngram_tokenize(text: str, n: int = 2, char_level: bool = False) -> List[str]
 
 def sentence_tokenize(text: str) -> List[str]:
     """
-    句子分词
-
-    简单的句子分割，基于常见的句子结束符。
+    Sentence tokenization based on common sentence terminators.
 
     Args:
-        text: 待分词的文本
+        text: Text to tokenize.
 
     Returns:
-        List[str]: 句子列表
+        List[str]: List of sentences.
 
     Example:
         >>> text = "Hello world. How are you? I'm fine!"
         >>> sentence_tokenize(text)
         ['Hello world.', 'How are you?', "I'm fine!"]
     """
-    # 简单的句子分割规则
     sentences = _sentence_split_pattern.split(text)
     return [s.strip() for s in sentences if s.strip()]
 
@@ -146,13 +141,13 @@ def sentence_tokenize(text: str) -> List[str]:
 
 def tokenize_preserving_case(text: str) -> List[str]:
     """
-    保持大小写的分词
+    Tokenization preserving original case.
 
     Args:
-        text: 待分词的文本
+        text: Text to tokenize.
 
     Returns:
-        List[str]: 词元列表
+        List[str]: List of tokens.
 
     Example:
         >>> tokenize_preserving_case("Hello World")
@@ -163,13 +158,13 @@ def tokenize_preserving_case(text: str) -> List[str]:
 
 def whitespace_tokenize(text: str) -> List[str]:
     """
-    基于空白字符的分词
+    Tokenization based on whitespace characters.
 
     Args:
-        text: 待分词的文本
+        text: Text to tokenize.
 
     Returns:
-        List[str]: 词元列表
+        List[str]: List of tokens.
 
     Example:
         >>> whitespace_tokenize("hello\\tworld\\ntest")
@@ -180,13 +175,13 @@ def whitespace_tokenize(text: str) -> List[str]:
 
 def get_word_count(text: str) -> int:
     """
-    获取单词数量
+    Get word count from text.
 
     Args:
-        text: 文本
+        text: Input text.
 
     Returns:
-        int: 单词数量
+        int: Number of words.
 
     Example:
         >>> get_word_count("Hello, world! How are you?")
@@ -197,14 +192,14 @@ def get_word_count(text: str) -> int:
 
 def get_character_count(text: str, include_spaces: bool = False) -> int:
     """
-    获取字符数量
+    Get character count from text.
 
     Args:
-        text: 文本
-        include_spaces: 是否包含空格
+        text: Input text.
+        include_spaces: Whether to include spaces in the count.
 
     Returns:
-        int: 字符数量
+        int: Number of characters.
 
     Example:
         >>> get_character_count("hello world")
diff --git a/openjudge/graders/text/number_accuracy.py b/openjudge/graders/text/number_accuracy.py
@@ -43,7 +43,16 @@ class NumberAccuracyGrader(BaseGrader):
     """
 
     def __init__(self, tolerance: float = 1e-6, **kwargs: Any) -> None:
-        """"""
+        """
+        Initialize NumberAccuracyGrader.
+
+        Args:
+            tolerance: Tolerance for number comparison. Default is 1e-6.
+            **kwargs: Additional keyword arguments passed to BaseGrader.
+
+        Example:
+            >>> grader = NumberAccuracyGrader(tolerance=0.01)
+        """
         super().__init__(
             name="number_accuracy",
             mode=GraderMode.POINTWISE,
@@ -55,8 +64,20 @@ def __init__(self, tolerance: float = 1e-6, **kwargs: Any) -> None:
         self._number_pattern = re.compile(r"-?\d+\.?\d*")
 
     def _extract_numbers(self, text: str) -> List[float]:
-        """Extract numbers from text"""
-        # Match integers and floating point numbers
+        """
+        Extract numbers from text.
+
+        Args:
+            text: Input text to extract numbers from.
+
+        Returns:
+            List[float]: List of extracted numbers as floats.
+
+        Example:
+            >>> grader = NumberAccuracyGrader()
+            >>> grader._extract_numbers("The result is 3.14 and 42")
+            [3.14, 42.0]
+        """
         numbers = self._number_pattern.findall(text)
         return [float(n) for n in numbers if n]
 
diff --git a/openjudge/graders/text/similarity.py b/openjudge/graders/text/similarity.py
@@ -148,7 +148,7 @@ def __init__(
         """
         super().__init__(
             name="similarity",
-            grader_mode=GraderMode.POINTWISE,
+            mode=GraderMode.POINTWISE,
             description="Unified text similarity grader",
         )
         self.normalize = normalize
diff --git a/openjudge/graders/text/string_match.py b/openjudge/graders/text/string_match.py
@@ -123,7 +123,7 @@ def __init__(
         """
         super().__init__(
             name=name,
-            grader_mode=GraderMode.POINTWISE,
+            mode=GraderMode.POINTWISE,
             description=description,
         )
         self.case_sensitive = case_sensitive
diff --git a/openjudge/models/openai_chat_model.py b/openjudge/models/openai_chat_model.py
diff --git a/openjudge/models/qwen_vl_model.py b/openjudge/models/qwen_vl_model.py

Original file line number	Diff line number	Diff line change
`@@ -148,7 +148,7 @@ def __init__(`
`148`	`148`	`"""`
`149`	`149`	`super().__init__(`
`150`	`150`	`name="similarity",`
`151`		`- grader_mode=GraderMode.POINTWISE,`
	`151`	`+ mode=GraderMode.POINTWISE,`
`152`	`152`	`description="Unified text similarity grader",`
`153`	`153`	`)`
`154`	`154`	`self.normalize = normalize`
Original file line number	Diff line number	Diff line change
`@@ -123,7 +123,7 @@ def __init__(`
`123`	`123`	`"""`
`124`	`124`	`super().__init__(`
`125`	`125`	`name=name,`
`126`		`- grader_mode=GraderMode.POINTWISE,`
	`126`	`+ mode=GraderMode.POINTWISE,`
`127`	`127`	`description=description,`
`128`	`128`	`)`
`129`	`129`	`self.case_sensitive = case_sensitive`