Skip to content

Commit 1d474f6

Browse files
committed
refactor(graders,models): cleanup and improve code quality
- Refactor text graders normalization and tokenization utils - Clean up number_accuracy, similarity, and string_match graders - Improve math_expression_verify implementation - Refactor openai_chat_model and qwen_vl_model - Update zero_shot_evaluation documentation - Add code cleanup guide documents
1 parent 3fb26aa commit 1d474f6

File tree

8 files changed

+108
-115
lines changed

8 files changed

+108
-115
lines changed

openjudge/graders/math/math_expression_verify.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ class MathExpressionVerifyGrader(BaseGrader):
2121

2222
def __init__(self, timeout_score: float = 1.0, **kwargs: Any):
2323
"""
24-
Initialize the MathVerifyGrader.
24+
Initialize the MathExpressionVerifyGrader.
2525
2626
Args:
27-
timeout_score: Score to assign on timeout
27+
timeout_score: Score to assign on timeout or exception.
2828
"""
2929
super().__init__(
3030
name="math_verify",
@@ -56,7 +56,7 @@ async def aevaluate(self, response: str, reference_response: str) -> GraderScore
5656
5757
Examples:
5858
>>> import asyncio
59-
>>> grader = MathVerifyGrader()
59+
>>> grader = MathExpressionVerifyGrader()
6060
>>> result = asyncio.run(grader.aevaluate("2+2", "4"))
6161
>>> print(result.score)
6262
1.0

openjudge/graders/text/_utils/normalization.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -176,18 +176,18 @@ def remove_punctuation(text: str, keep_chars: Optional[str] = None) -> str:
176176

177177
def normalize_for_comparison(text: str, method: str = "standard") -> str:
178178
"""
179-
根据指定方法归一化文本以进行比较
179+
Normalize text for comparison using the specified method.
180180
181181
Args:
182-
text: 待归一化的文本
183-
method: 归一化方法
184-
- "standard": 标准归一化(小写 + 去标点 + 去冠词)
185-
- "minimal": 最小归一化(仅去多余空格)
186-
- "aggressive": 激进归一化(所有选项)
187-
- "case_only": 仅大小写归一化
182+
text: Text to normalize.
183+
method: Normalization method.
184+
- "standard": Standard normalization (lowercase + remove punctuation + remove articles).
185+
- "minimal": Minimal normalization (only remove extra whitespace).
186+
- "aggressive": Aggressive normalization (all options enabled).
187+
- "case_only": Case normalization only.
188188
189189
Returns:
190-
str: 归一化后的文本
190+
str: Normalized text.
191191
192192
Example:
193193
>>> normalize_for_comparison("The Cat!", "standard")
@@ -214,14 +214,14 @@ def normalize_for_comparison(text: str, method: str = "standard") -> str:
214214

215215
def normalize_numbers(text: str, replace_with: str = " NUMBER ") -> str:
216216
"""
217-
将数字替换为占位符
217+
Replace numbers with a placeholder.
218218
219219
Args:
220-
text: 待处理的文本
221-
replace_with: 替换的占位符
220+
text: Text to process.
221+
replace_with: Placeholder string to replace numbers with.
222222
223223
Returns:
224-
str: 替换后的文本
224+
str: Text with numbers replaced.
225225
226226
Example:
227227
>>> normalize_numbers("I have 3 apples and 5 oranges")
@@ -232,14 +232,14 @@ def normalize_numbers(text: str, replace_with: str = " NUMBER ") -> str:
232232

233233
def normalize_urls(text: str, replace_with: str = " URL ") -> str:
234234
"""
235-
将 URL 替换为占位符
235+
Replace URLs with a placeholder.
236236
237237
Args:
238-
text: 待处理的文本
239-
replace_with: 替换的占位符
238+
text: Text to process.
239+
replace_with: Placeholder string to replace URLs with.
240240
241241
Returns:
242-
str: 替换后的文本
242+
str: Text with URLs replaced.
243243
244244
Example:
245245
>>> text = "Visit https://example.com for more info"
@@ -252,14 +252,14 @@ def normalize_urls(text: str, replace_with: str = " URL ") -> str:
252252

253253
def normalize_emails(text: str, replace_with: str = " EMAIL ") -> str:
254254
"""
255-
将邮箱地址替换为占位符
255+
Replace email addresses with a placeholder.
256256
257257
Args:
258-
text: 待处理的文本
259-
replace_with: 替换的占位符
258+
text: Text to process.
259+
replace_with: Placeholder string to replace emails with.
260260
261261
Returns:
262-
str: 替换后的文本
262+
str: Text with email addresses replaced.
263263
264264
Example:
265265
>>> text = "Contact me at [email protected]"

openjudge/graders/text/_utils/tokenization.py

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"""
33
Tokenization Utilities
44
5-
分词工具,用于将文本分解为词元(tokens)。
5+
Tokenization tools for breaking text into tokens.
66
"""
77

88
import re
@@ -11,14 +11,14 @@
1111

1212
def simple_tokenize(text: str, lowercase: bool = False) -> List[str]:
1313
"""
14-
简单分词(基于空格)
14+
Simple tokenization based on whitespace.
1515
1616
Args:
17-
text: 待分词的文本
18-
lowercase: 是否转换为小写
17+
text: Text to tokenize.
18+
lowercase: Whether to convert to lowercase.
1919
2020
Returns:
21-
List[str]: 词元列表
21+
List[str]: List of tokens.
2222
2323
Example:
2424
>>> simple_tokenize("Hello, world!")
@@ -35,16 +35,14 @@ def simple_tokenize(text: str, lowercase: bool = False) -> List[str]:
3535

3636
def word_tokenize(text: str, remove_punctuation: bool = True) -> List[str]:
3737
"""
38-
单词级分词
39-
40-
使用正则表达式分词,可选择是否移除标点。
38+
Word-level tokenization using regex.
4139
4240
Args:
43-
text: 待分词的文本
44-
remove_punctuation: 是否移除标点符号
41+
text: Text to tokenize.
42+
remove_punctuation: Whether to remove punctuation marks.
4543
4644
Returns:
47-
List[str]: 词元列表
45+
List[str]: List of tokens.
4846
4947
Example:
5048
>>> word_tokenize("Hello, world!")
@@ -53,25 +51,25 @@ def word_tokenize(text: str, remove_punctuation: bool = True) -> List[str]:
5351
['Hello', ',', 'world', '!']
5452
"""
5553
if remove_punctuation:
56-
# 只保留字母、数字和空格
54+
# Keep only letters, numbers, and spaces
5755
text = _non_word_space_pattern.sub(" ", text)
5856
tokens = text.split()
5957
else:
60-
# 保留标点,但将其作为独立的token
58+
# Keep punctuation as separate tokens
6159
tokens = _word_punctuation_pattern.findall(text)
6260

6361
return [t for t in tokens if t.strip()]
6462

6563

6664
def character_tokenize(text: str) -> List[str]:
6765
"""
68-
字符级分词
66+
Character-level tokenization.
6967
7068
Args:
71-
text: 待分词的文本
69+
text: Text to tokenize.
7270
7371
Returns:
74-
List[str]: 字符列表
72+
List[str]: List of characters.
7573
7674
Example:
7775
>>> character_tokenize("hello")
@@ -82,15 +80,15 @@ def character_tokenize(text: str) -> List[str]:
8280

8381
def ngram_tokenize(text: str, n: int = 2, char_level: bool = False) -> List[str]:
8482
"""
85-
N-gram 分词
83+
N-gram tokenization.
8684
8785
Args:
88-
text: 待分词的文本
89-
n: N-gram 的大小
90-
char_level: 是否为字符级 n-gram(否则为词级)
86+
text: Text to tokenize.
87+
n: Size of the n-gram.
88+
char_level: Whether to use character-level n-grams (otherwise word-level).
9189
9290
Returns:
93-
List[str]: N-gram 列表
91+
List[str]: List of n-grams.
9492
9593
Example:
9694
>>> ngram_tokenize("hello world", n=2, char_level=True)
@@ -121,22 +119,19 @@ def ngram_tokenize(text: str, n: int = 2, char_level: bool = False) -> List[str]
121119

122120
def sentence_tokenize(text: str) -> List[str]:
123121
"""
124-
句子分词
125-
126-
简单的句子分割,基于常见的句子结束符。
122+
Sentence tokenization based on common sentence terminators.
127123
128124
Args:
129-
text: 待分词的文本
125+
text: Text to tokenize.
130126
131127
Returns:
132-
List[str]: 句子列表
128+
List[str]: List of sentences.
133129
134130
Example:
135131
>>> text = "Hello world. How are you? I'm fine!"
136132
>>> sentence_tokenize(text)
137133
['Hello world.', 'How are you?', "I'm fine!"]
138134
"""
139-
# 简单的句子分割规则
140135
sentences = _sentence_split_pattern.split(text)
141136
return [s.strip() for s in sentences if s.strip()]
142137

@@ -146,13 +141,13 @@ def sentence_tokenize(text: str) -> List[str]:
146141

147142
def tokenize_preserving_case(text: str) -> List[str]:
148143
"""
149-
保持大小写的分词
144+
Tokenization preserving original case.
150145
151146
Args:
152-
text: 待分词的文本
147+
text: Text to tokenize.
153148
154149
Returns:
155-
List[str]: 词元列表
150+
List[str]: List of tokens.
156151
157152
Example:
158153
>>> tokenize_preserving_case("Hello World")
@@ -163,13 +158,13 @@ def tokenize_preserving_case(text: str) -> List[str]:
163158

164159
def whitespace_tokenize(text: str) -> List[str]:
165160
"""
166-
基于空白字符的分词
161+
Tokenization based on whitespace characters.
167162
168163
Args:
169-
text: 待分词的文本
164+
text: Text to tokenize.
170165
171166
Returns:
172-
List[str]: 词元列表
167+
List[str]: List of tokens.
173168
174169
Example:
175170
>>> whitespace_tokenize("hello\\tworld\\ntest")
@@ -180,13 +175,13 @@ def whitespace_tokenize(text: str) -> List[str]:
180175

181176
def get_word_count(text: str) -> int:
182177
"""
183-
获取单词数量
178+
Get word count from text.
184179
185180
Args:
186-
text: 文本
181+
text: Input text.
187182
188183
Returns:
189-
int: 单词数量
184+
int: Number of words.
190185
191186
Example:
192187
>>> get_word_count("Hello, world! How are you?")
@@ -197,14 +192,14 @@ def get_word_count(text: str) -> int:
197192

198193
def get_character_count(text: str, include_spaces: bool = False) -> int:
199194
"""
200-
获取字符数量
195+
Get character count from text.
201196
202197
Args:
203-
text: 文本
204-
include_spaces: 是否包含空格
198+
text: Input text.
199+
include_spaces: Whether to include spaces in the count.
205200
206201
Returns:
207-
int: 字符数量
202+
int: Number of characters.
208203
209204
Example:
210205
>>> get_character_count("hello world")

openjudge/graders/text/number_accuracy.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,16 @@ class NumberAccuracyGrader(BaseGrader):
4343
"""
4444

4545
def __init__(self, tolerance: float = 1e-6, **kwargs: Any) -> None:
46-
""""""
46+
"""
47+
Initialize NumberAccuracyGrader.
48+
49+
Args:
50+
tolerance: Tolerance for number comparison. Default is 1e-6.
51+
**kwargs: Additional keyword arguments passed to BaseGrader.
52+
53+
Example:
54+
>>> grader = NumberAccuracyGrader(tolerance=0.01)
55+
"""
4756
super().__init__(
4857
name="number_accuracy",
4958
mode=GraderMode.POINTWISE,
@@ -55,8 +64,20 @@ def __init__(self, tolerance: float = 1e-6, **kwargs: Any) -> None:
5564
self._number_pattern = re.compile(r"-?\d+\.?\d*")
5665

5766
def _extract_numbers(self, text: str) -> List[float]:
58-
"""Extract numbers from text"""
59-
# Match integers and floating point numbers
67+
"""
68+
Extract numbers from text.
69+
70+
Args:
71+
text: Input text to extract numbers from.
72+
73+
Returns:
74+
List[float]: List of extracted numbers as floats.
75+
76+
Example:
77+
>>> grader = NumberAccuracyGrader()
78+
>>> grader._extract_numbers("The result is 3.14 and 42")
79+
[3.14, 42.0]
80+
"""
6081
numbers = self._number_pattern.findall(text)
6182
return [float(n) for n in numbers if n]
6283

openjudge/graders/text/similarity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def __init__(
148148
"""
149149
super().__init__(
150150
name="similarity",
151-
grader_mode=GraderMode.POINTWISE,
151+
mode=GraderMode.POINTWISE,
152152
description="Unified text similarity grader",
153153
)
154154
self.normalize = normalize

openjudge/graders/text/string_match.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def __init__(
123123
"""
124124
super().__init__(
125125
name=name,
126-
grader_mode=GraderMode.POINTWISE,
126+
mode=GraderMode.POINTWISE,
127127
description=description,
128128
)
129129
self.case_sensitive = case_sensitive

0 commit comments

Comments
 (0)