@@ -91,8 +91,9 @@ def test_code_edit_metric(self):
9191 code_result = results ['code_edit' ]
9292 self .assertTrue (code_result .success )
9393 self .assertIsInstance (code_result .score , float )
94- self .assertGreaterEqual (code_result .score , 0.0 )
95- self .assertLessEqual (code_result .score , 1.0 )
94+ # 验证固定内容的确定分数
95+ self .assertAlmostEqual (code_result .score , 0.918367 , places = 5 ,
96+ msg = f"code_edit分数应该是0.918367,实际: { code_result .score } " )
9697
9798 # 验证详细信息
9899 self .assertEqual (code_result .details ['content_type' ], 'code' )
@@ -110,8 +111,9 @@ def test_formula_edit_metric(self):
110111 formula_result = results ['formula_edit' ]
111112 self .assertTrue (formula_result .success )
112113 self .assertIsInstance (formula_result .score , float )
113- self .assertGreaterEqual (formula_result .score , 0.0 )
114- self .assertLessEqual (formula_result .score , 1.0 )
114+ # 验证固定内容的确定分数
115+ self .assertAlmostEqual (formula_result .score , 1.000000 , places = 5 ,
116+ msg = f"formula_edit分数应该是1.000000,实际: { formula_result .score } " )
115117
116118 # 验证详细信息
117119 self .assertEqual (formula_result .details ['content_type' ], 'formula' )
@@ -127,8 +129,9 @@ def test_table_edit_metric(self):
127129 table_result = results ['table_edit' ]
128130 self .assertTrue (table_result .success )
129131 self .assertIsInstance (table_result .score , float )
130- self .assertGreaterEqual (table_result .score , 0.0 )
131- self .assertLessEqual (table_result .score , 1.0 )
132+ # 验证固定内容的确定分数
133+ self .assertAlmostEqual (table_result .score , 0.868852 , places = 5 ,
134+ msg = f"table_edit分数应该是0.868852,实际: { table_result .score } " )
132135
133136 # 验证详细信息
134137 self .assertEqual (table_result .details ['content_type' ], 'table' )
@@ -144,8 +147,9 @@ def test_table_teds_metric(self):
144147 teds_result = results ['table_TEDS' ]
145148 self .assertTrue (teds_result .success )
146149 self .assertIsInstance (teds_result .score , float )
147- self .assertGreaterEqual (teds_result .score , 0.0 )
148- self .assertLessEqual (teds_result .score , 1.0 )
150+ # 验证固定内容的确定分数
151+ self .assertAlmostEqual (teds_result .score , 0.300000 , places = 5 ,
152+ msg = f"table_TEDS分数应该是0.300000,实际: { teds_result .score } " )
149153
150154 # 验证详细信息
151155 self .assertEqual (teds_result .details ['content_type' ], 'table' )
@@ -160,8 +164,9 @@ def test_text_edit_metric(self):
160164 text_result = results ['text_edit' ]
161165 self .assertTrue (text_result .success )
162166 self .assertIsInstance (text_result .score , float )
163- self .assertGreaterEqual (text_result .score , 0.0 )
164- self .assertLessEqual (text_result .score , 1.0 )
167+ # 验证固定内容的确定分数
168+ self .assertAlmostEqual (text_result .score , 0.769231 , places = 5 ,
169+ msg = f"text_edit分数应该是0.769231,实际: { text_result .score } " )
165170
166171 # 验证详细信息
167172 self .assertEqual (text_result .details ['content_type' ], 'text' )
@@ -204,11 +209,11 @@ def test_identical_content(self):
204209 groundtruth_content = self .groundtruth_content
205210 )
206211
207- # 大部分指标应该得到完美分数(1.0),除了可能某些算法有特殊处理
212+ # 完全相同的内容应该得到满分
208213 for metric_name in ['code_edit' , 'formula_edit' , 'table_edit' , 'text_edit' ]:
209214 if metric_name in results and results [metric_name ].success :
210- self .assertGreaterEqual (results [metric_name ].score , 0.8 ,
211- f"相同内容的{ metric_name } 分数应该很高 " )
215+ self .assertAlmostEqual (results [metric_name ].score , 1.0 , places = 5 ,
216+ msg = f"相同内容的{ metric_name } 应该得到满分,实际: { results [ metric_name ]. score } " )
212217
213218 def test_empty_content (self ):
214219 """测试空内容的情况"""
@@ -224,10 +229,6 @@ def test_empty_content(self):
224229 f"空内容的{ metric_name } 应该正确处理" )
225230
226231
227-
228-
229-
230-
231232class TestErrorHandling (unittest .TestCase ):
232233 """测试错误处理"""
233234
@@ -296,16 +297,17 @@ def hello_world():
296297 groundtruth_content = groundtruth
297298 )
298299
299- # 验证文本编辑距离
300+ # 验证文本编辑距离(固定内容应该有确定分数)
300301 self .assertIn ("text_edit" , results )
301302 self .assertTrue (results ["text_edit" ].success )
302- # 基于实际测试结果调整期望值
303- self . assertGreater ( results [" text_edit" ].score , 0.50 )
303+ self . assertAlmostEqual ( results [ "text_edit" ]. score , 1.000000 , places = 5 ,
304+ msg = f"text_edit分数应该是1.000000,实际: { results [' text_edit' ].score } " )
304305
305- # 验证代码编辑距离(代码内容完全一致,应该有高分 )
306+ # 验证代码编辑距离(缺少python标识符导致轻微差异 )
306307 self .assertIn ("code_edit" , results )
307308 self .assertTrue (results ["code_edit" ].success )
308- self .assertGreater (results ["code_edit" ].score , 0.90 )
309+ self .assertAlmostEqual (results ["code_edit" ].score , 0.905797 , places = 5 ,
310+ msg = f"code_edit分数应该是0.905797,实际: { results ['code_edit' ].score } " )
309311
310312 def test_table_sample_edit_distance (self ):
311313 """测试表格样本的编辑距离"""
@@ -328,15 +330,17 @@ def test_table_sample_edit_distance(self):
328330 groundtruth_content = groundtruth
329331 )
330332
331- # 验证表格编辑距离(应该接近0.9022 )
333+ # 验证表格编辑距离(分隔符长度差异导致的固定分数 )
332334 self .assertIn ("table_edit" , results )
333335 self .assertTrue (results ["table_edit" ].success )
334- self .assertGreater (results ["table_edit" ].score , 0.85 )
336+ self .assertAlmostEqual (results ["table_edit" ].score , 0.888889 , places = 5 ,
337+ msg = f"table_edit分数应该是0.888889,实际: { results ['table_edit' ].score } " )
335338
336- # 验证TEDS指标(表格结构相同,应该满分 )
339+ # 验证TEDS指标(表格结构完全相同,满分 )
337340 self .assertIn ("table_TEDS" , results )
338341 self .assertTrue (results ["table_TEDS" ].success )
339- self .assertGreater (results ["table_TEDS" ].score , 0.95 )
342+ self .assertAlmostEqual (results ["table_TEDS" ].score , 1.000000 , places = 5 ,
343+ msg = f"table_TEDS分数应该是1.000000,实际: { results ['table_TEDS' ].score } " )
340344
341345 def test_formula_sample_edit_distance (self ):
342346 """测试公式样本的编辑距离"""
@@ -361,14 +365,17 @@ def test_formula_sample_edit_distance(self):
361365 groundtruth_content = groundtruth
362366 )
363367
364- # 验证公式编辑距离(符号转义导致分数较低 )
368+ # 验证公式编辑距离(符号转义导致的固定低分 )
365369 self .assertIn ("formula_edit" , results )
366370 self .assertTrue (results ["formula_edit" ].success )
367- self .assertGreater (results ["formula_edit" ].score , 0.10 )
371+ self .assertAlmostEqual (results ["formula_edit" ].score , 0.122807 , places = 5 ,
372+ msg = f"formula_edit分数应该是0.122807,实际: { results ['formula_edit' ].score } " )
368373
369- # 验证文本编辑距离(去除公式后的纯文本)
374+ # 验证文本编辑距离(去除公式后的纯文本,也受符号转义影响 )
370375 self .assertIn ("text_edit" , results )
371376 self .assertTrue (results ["text_edit" ].success )
377+ self .assertAlmostEqual (results ["text_edit" ].score , 0.372093 , places = 5 ,
378+ msg = f"text_edit分数应该是0.372093,实际: { results ['text_edit' ].score } " )
372379
373380 def test_overall_score_calculation (self ):
374381 """测试综合分数计算"""
0 commit comments