Skip to content

Commit c884a74

Browse files
authored
Merge pull request #9 from e06084/main
tests: update metrics test
2 parents 592d040 + 8f2009e commit c884a74

File tree

1 file changed

+36
-29
lines changed

1 file changed

+36
-29
lines changed

tests/test_metrics.py

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ def test_code_edit_metric(self):
9191
code_result = results['code_edit']
9292
self.assertTrue(code_result.success)
9393
self.assertIsInstance(code_result.score, float)
94-
self.assertGreaterEqual(code_result.score, 0.0)
95-
self.assertLessEqual(code_result.score, 1.0)
94+
# 验证固定内容的确定分数
95+
self.assertAlmostEqual(code_result.score, 0.918367, places=5,
96+
msg=f"code_edit分数应该是0.918367,实际: {code_result.score}")
9697

9798
# 验证详细信息
9899
self.assertEqual(code_result.details['content_type'], 'code')
@@ -110,8 +111,9 @@ def test_formula_edit_metric(self):
110111
formula_result = results['formula_edit']
111112
self.assertTrue(formula_result.success)
112113
self.assertIsInstance(formula_result.score, float)
113-
self.assertGreaterEqual(formula_result.score, 0.0)
114-
self.assertLessEqual(formula_result.score, 1.0)
114+
# 验证固定内容的确定分数
115+
self.assertAlmostEqual(formula_result.score, 1.000000, places=5,
116+
msg=f"formula_edit分数应该是1.000000,实际: {formula_result.score}")
115117

116118
# 验证详细信息
117119
self.assertEqual(formula_result.details['content_type'], 'formula')
@@ -127,8 +129,9 @@ def test_table_edit_metric(self):
127129
table_result = results['table_edit']
128130
self.assertTrue(table_result.success)
129131
self.assertIsInstance(table_result.score, float)
130-
self.assertGreaterEqual(table_result.score, 0.0)
131-
self.assertLessEqual(table_result.score, 1.0)
132+
# 验证固定内容的确定分数
133+
self.assertAlmostEqual(table_result.score, 0.868852, places=5,
134+
msg=f"table_edit分数应该是0.868852,实际: {table_result.score}")
132135

133136
# 验证详细信息
134137
self.assertEqual(table_result.details['content_type'], 'table')
@@ -144,8 +147,9 @@ def test_table_teds_metric(self):
144147
teds_result = results['table_TEDS']
145148
self.assertTrue(teds_result.success)
146149
self.assertIsInstance(teds_result.score, float)
147-
self.assertGreaterEqual(teds_result.score, 0.0)
148-
self.assertLessEqual(teds_result.score, 1.0)
150+
# 验证固定内容的确定分数
151+
self.assertAlmostEqual(teds_result.score, 0.300000, places=5,
152+
msg=f"table_TEDS分数应该是0.300000,实际: {teds_result.score}")
149153

150154
# 验证详细信息
151155
self.assertEqual(teds_result.details['content_type'], 'table')
@@ -160,8 +164,9 @@ def test_text_edit_metric(self):
160164
text_result = results['text_edit']
161165
self.assertTrue(text_result.success)
162166
self.assertIsInstance(text_result.score, float)
163-
self.assertGreaterEqual(text_result.score, 0.0)
164-
self.assertLessEqual(text_result.score, 1.0)
167+
# 验证固定内容的确定分数
168+
self.assertAlmostEqual(text_result.score, 0.769231, places=5,
169+
msg=f"text_edit分数应该是0.769231,实际: {text_result.score}")
165170

166171
# 验证详细信息
167172
self.assertEqual(text_result.details['content_type'], 'text')
@@ -204,11 +209,11 @@ def test_identical_content(self):
204209
groundtruth_content=self.groundtruth_content
205210
)
206211

207-
# 大部分指标应该得到完美分数(1.0),除了可能某些算法有特殊处理
212+
# 完全相同的内容应该得到满分
208213
for metric_name in ['code_edit', 'formula_edit', 'table_edit', 'text_edit']:
209214
if metric_name in results and results[metric_name].success:
210-
self.assertGreaterEqual(results[metric_name].score, 0.8,
211-
f"相同内容的{metric_name}分数应该很高")
215+
self.assertAlmostEqual(results[metric_name].score, 1.0, places=5,
216+
msg=f"相同内容的{metric_name}应该得到满分,实际: {results[metric_name].score}")
212217

213218
def test_empty_content(self):
214219
"""测试空内容的情况"""
@@ -224,10 +229,6 @@ def test_empty_content(self):
224229
f"空内容的{metric_name}应该正确处理")
225230

226231

227-
228-
229-
230-
231232
class TestErrorHandling(unittest.TestCase):
232233
"""测试错误处理"""
233234

@@ -296,16 +297,17 @@ def hello_world():
296297
groundtruth_content=groundtruth
297298
)
298299

299-
# 验证文本编辑距离
300+
# 验证文本编辑距离(固定内容应该有确定分数)
300301
self.assertIn("text_edit", results)
301302
self.assertTrue(results["text_edit"].success)
302-
# 基于实际测试结果调整期望值
303-
self.assertGreater(results["text_edit"].score, 0.50)
303+
self.assertAlmostEqual(results["text_edit"].score, 1.000000, places=5,
304+
msg=f"text_edit分数应该是1.000000,实际: {results['text_edit'].score}")
304305

305-
# 验证代码编辑距离(代码内容完全一致,应该有高分
306+
# 验证代码编辑距离(缺少python标识符导致轻微差异
306307
self.assertIn("code_edit", results)
307308
self.assertTrue(results["code_edit"].success)
308-
self.assertGreater(results["code_edit"].score, 0.90)
309+
self.assertAlmostEqual(results["code_edit"].score, 0.905797, places=5,
310+
msg=f"code_edit分数应该是0.905797,实际: {results['code_edit'].score}")
309311

310312
def test_table_sample_edit_distance(self):
311313
"""测试表格样本的编辑距离"""
@@ -328,15 +330,17 @@ def test_table_sample_edit_distance(self):
328330
groundtruth_content=groundtruth
329331
)
330332

331-
# 验证表格编辑距离(应该接近0.9022
333+
# 验证表格编辑距离(分隔符长度差异导致的固定分数
332334
self.assertIn("table_edit", results)
333335
self.assertTrue(results["table_edit"].success)
334-
self.assertGreater(results["table_edit"].score, 0.85)
336+
self.assertAlmostEqual(results["table_edit"].score, 0.888889, places=5,
337+
msg=f"table_edit分数应该是0.888889,实际: {results['table_edit'].score}")
335338

336-
# 验证TEDS指标(表格结构相同,应该满分
339+
# 验证TEDS指标(表格结构完全相同,满分
337340
self.assertIn("table_TEDS", results)
338341
self.assertTrue(results["table_TEDS"].success)
339-
self.assertGreater(results["table_TEDS"].score, 0.95)
342+
self.assertAlmostEqual(results["table_TEDS"].score, 1.000000, places=5,
343+
msg=f"table_TEDS分数应该是1.000000,实际: {results['table_TEDS'].score}")
340344

341345
def test_formula_sample_edit_distance(self):
342346
"""测试公式样本的编辑距离"""
@@ -361,14 +365,17 @@ def test_formula_sample_edit_distance(self):
361365
groundtruth_content=groundtruth
362366
)
363367

364-
# 验证公式编辑距离(符号转义导致分数较低
368+
# 验证公式编辑距离(符号转义导致的固定低分
365369
self.assertIn("formula_edit", results)
366370
self.assertTrue(results["formula_edit"].success)
367-
self.assertGreater(results["formula_edit"].score, 0.10)
371+
self.assertAlmostEqual(results["formula_edit"].score, 0.122807, places=5,
372+
msg=f"formula_edit分数应该是0.122807,实际: {results['formula_edit'].score}")
368373

369-
# 验证文本编辑距离(去除公式后的纯文本)
374+
# 验证文本编辑距离(去除公式后的纯文本,也受符号转义影响
370375
self.assertIn("text_edit", results)
371376
self.assertTrue(results["text_edit"].success)
377+
self.assertAlmostEqual(results["text_edit"].score, 0.372093, places=5,
378+
msg=f"text_edit分数应该是0.372093,实际: {results['text_edit'].score}")
372379

373380
def test_overall_score_calculation(self):
374381
"""测试综合分数计算"""

0 commit comments

Comments
 (0)