@@ -383,12 +383,12 @@ def _extract_content(self, html, url=None):
383383 results_dir .mkdir (exist_ok = True )
384384
385385 results_path = results_dir / "evaluation_results.json"
386- DataSaver .save_evaluation_results (result . to_dict () , results_path )
386+ DataSaver .save_evaluation_results (result , results_path )
387387 print (f"\n 结果已保存到: { results_path } " )
388388
389389 # 10. 生成报告
390390 report_path = results_dir / "evaluation_report.csv"
391- DataSaver .save_summary_report (result . to_dict () , report_path )
391+ DataSaver .save_summary_report (result , report_path )
392392 print (f"报告已保存到: { report_path } " )
393393
394394
@@ -458,10 +458,251 @@ def _extract_content(self, html, url=None):
458458 print (f"\n 📊 榜单已保存到: { leaderboard_path } " )
459459
460460
461+ def demo_llm_webkit_evaluation ():
462+ """演示LLM-WebKit抽取器的6项指标评测"""
463+
464+ print ("=== LLM-WebKit Extractor 6项指标评测示例 ===\n " )
465+
466+ # 设置日志
467+ setup_logging (level = "INFO" )
468+
469+ # 1. 创建包含各种内容类型的测试数据集
470+ print ("1. 创建包含多种内容类型的测试数据集..." )
471+
472+ samples = []
473+
474+ # 样本1: 包含文本和代码
475+ samples .append (DataSample (
476+ id = "text_code_sample" ,
477+ html = """
478+ <html>
479+ <body>
480+ <h1>Python编程示例</h1>
481+ <p>这是一段关于Python编程的介绍文本。</p>
482+ <pre><code>
483+ def hello_world():
484+ print("Hello, World!")
485+ return True
486+ </code></pre>
487+ <p>以上代码展示了一个简单的Python函数。</p>
488+ </body>
489+ </html>
490+ """ ,
491+ groundtruth_content = """# Python编程示例
492+
493+ 这是一段关于Python编程的介绍文本。
494+
495+ ```python
496+ def hello_world():
497+ print("Hello, World!")
498+ return True
499+ ```
500+
501+ 以上代码展示了一个简单的Python函数。""" ,
502+ groundtruth_content_list = [
503+ {"type" : "heading" , "content" : "Python编程示例" , "level" : 1 },
504+ {"type" : "text" , "content" : "这是一段关于Python编程的介绍文本。" },
505+ {"type" : "code" , "content" : "def hello_world():\n print(\" Hello, World!\" )\n return True" , "language" : "python" },
506+ {"type" : "text" , "content" : "以上代码展示了一个简单的Python函数。" }
507+ ]
508+ ))
509+
510+ # 样本2: 包含表格
511+ samples .append (DataSample (
512+ id = "table_sample" ,
513+ html = """
514+ <html>
515+ <body>
516+ <h2>销售数据统计</h2>
517+ <table>
518+ <thead>
519+ <tr>
520+ <th>产品</th>
521+ <th>销量</th>
522+ <th>收入</th>
523+ </tr>
524+ </thead>
525+ <tbody>
526+ <tr>
527+ <td>产品A</td>
528+ <td>100</td>
529+ <td>1000</td>
530+ </tr>
531+ <tr>
532+ <td>产品B</td>
533+ <td>200</td>
534+ <td>3000</td>
535+ </tr>
536+ </tbody>
537+ </table>
538+ </body>
539+ </html>
540+ """ ,
541+ groundtruth_content = """## 销售数据统计
542+
543+ | 产品 | 销量 | 收入 |
544+ |------|------|------|
545+ | 产品A | 100 | 1000 |
546+ | 产品B | 200 | 3000 |""" ,
547+ groundtruth_content_list = [
548+ {"type" : "heading" , "content" : "销售数据统计" , "level" : 2 },
549+ {"type" : "table" , "content" : "| 产品 | 销量 | 收入 |\n |------|------|------|\n | 产品A | 100 | 1000 |\n | 产品B | 200 | 3000 |" }
550+ ]
551+ ))
552+
553+ # 样本3: 包含公式
554+ samples .append (DataSample (
555+ id = "formula_sample" ,
556+ html = """
557+ <html>
558+ <body>
559+ <h2>数学公式示例</h2>
560+ <p>这是一个行内公式: $E = mc^2$</p>
561+ <p>这是一个行间公式:</p>
562+ <div>$$\\ int_{-\\ infty}^{\\ infty} e^{-x^2} dx = \\ sqrt{\\ pi}$$</div>
563+ </body>
564+ </html>
565+ """ ,
566+ groundtruth_content = """## 数学公式示例
567+
568+ 这是一个行内公式: $E = mc^2$
569+
570+ 这是一个行间公式:
571+
572+ $$\\ int_{-\\ infty}^{\\ infty} e^{-x^2} dx = \\ sqrt{\\ pi}$$""" ,
573+ groundtruth_content_list = [
574+ {"type" : "heading" , "content" : "数学公式示例" , "level" : 2 },
575+ {"type" : "text" , "content" : "这是一个行内公式: $E = mc^2$" },
576+ {"type" : "text" , "content" : "这是一个行间公式:" },
577+ {"type" : "formula" , "content" : "\\ int_{-\\ infty}^{\\ infty} e^{-x^2} dx = \\ sqrt{\\ pi}" }
578+ ]
579+ ))
580+
581+ # 创建数据集并添加样本
582+ dataset = BenchmarkDataset (name = "llm_webkit_test" , description = "LLM-WebKit 6项指标测试数据集" )
583+ for sample in samples :
584+ dataset .add_sample (sample )
585+
586+ print (f"测试数据集包含 { len (dataset )} 个样本" )
587+ print (f"样本类型: 文本+代码, 表格, 公式\n " )
588+
589+ # 2. 创建LLM-WebKit抽取器
590+ print ("2. 创建LLM-WebKit抽取器..." )
591+
592+ # 显示所有可用的抽取器
593+ available_extractors = ExtractorFactory .list_available ()
594+ print (f"可用的抽取器: { available_extractors } " )
595+
596+ # 直接创建LLM-WebKit抽取器,设置模型路径
597+ config = {
598+ "model_path" : "/Users/chupei/model/checkpoint-3296"
599+ }
600+ extractor = ExtractorFactory .create ("llm-webkit" , config = config )
601+ print (f"✅ LLM-WebKit抽取器创建成功,模型路径: { config ['model_path' ]} " )
602+
603+ print ()
604+
605+ # 3. 创建评测器并显示所有可用指标
606+ print ("3. 创建评测器..." )
607+ evaluator = Evaluator ()
608+ available_metrics = evaluator .metric_calculator .list_available_metrics ()
609+ print (f"✅ 可用的评测指标 ({ len (available_metrics )} 项):" )
610+
611+ # 按照6项指标分类显示
612+ target_metrics = ["overall" , "text_edit" , "code_edit" , "table_edit" , "table_TEDS" , "formula_edit" ]
613+
614+ for metric in target_metrics :
615+ if metric in available_metrics :
616+ print (f" ✅ { metric } " )
617+ else :
618+ print (f" ❌ { metric } (未注册)" )
619+
620+ print ()
621+
622+ # 4. 运行评测
623+ print ("4. 开始评测..." )
624+ print ("=" * 60 )
625+
626+ result = evaluator .evaluate (
627+ dataset = dataset ,
628+ extractor = extractor ,
629+ max_samples = None # 评测所有样本
630+ )
631+
632+ # 5. 显示详细的6项指标结果
633+ print ("\n 5. 📊 6项指标详细评测结果:" )
634+ print ("=" * 60 )
635+
636+ results_dict = result .to_dict ()
637+
638+ # 从overall_metrics中提取指标结果
639+ metrics = results_dict .get ('overall_metrics' , {})
640+
641+ # 按照指标分类显示
642+ print (f"\n 🏆 综合指标:" )
643+ if 'overall' in metrics :
644+ print (f" overall (综合得分): { metrics ['overall' ]:.4f} " )
645+ else :
646+ print (" overall: 未计算" )
647+
648+ print (f"\n 📝 文本相关指标:" )
649+ if 'text_edit' in metrics :
650+ print (f" text_edit (文本编辑距离): { metrics ['text_edit' ]:.4f} " )
651+ else :
652+ print (" text_edit: 未计算" )
653+ if 'code_edit' in metrics :
654+ print (f" code_edit (代码编辑距离): { metrics ['code_edit' ]:.4f} " )
655+ else :
656+ print (" code_edit: 未计算" )
657+
658+ print (f"\n 📊 表格相关指标:" )
659+ if 'table_edit' in metrics :
660+ print (f" table_edit (表格编辑距离): { metrics ['table_edit' ]:.4f} " )
661+ else :
662+ print (" table_edit: 未计算" )
663+ if 'table_TEDS' in metrics :
664+ print (f" table_TEDS (表格结构相似度): { metrics ['table_TEDS' ]:.4f} " )
665+ else :
666+ print (" table_TEDS: 未计算" )
667+
668+ print (f"\n 🧮 公式相关指标:" )
669+ if 'formula_edit' in metrics :
670+ print (f" formula_edit (公式编辑距离): { metrics ['formula_edit' ]:.4f} " )
671+ else :
672+ print (" formula_edit: 未计算" )
673+
674+ print (f"\n 📈 详细统计:" )
675+ print (f" 总样本数: { len (dataset )} " )
676+ success_count = len ([s for s in results_dict .get ('sample_results' , []) if s .get ('extraction_success' , False )])
677+ failure_count = len (dataset ) - success_count
678+ print (f" 成功样本数: { success_count } " )
679+ print (f" 失败样本数: { failure_count } " )
680+
681+ # 6. 保存结果到文件
682+ print ("\n " + "=" * 60 )
683+ print ("6. 保存评测结果..." )
684+
685+ results_dir = Path ("results" )
686+ results_dir .mkdir (exist_ok = True )
687+
688+ # 保存详细结果
689+ results_path = results_dir / "llm_webkit_evaluation_results.json"
690+ DataSaver .save_evaluation_results (result , results_path ) # 直接传递result对象
691+ print (f"✅ 详细结果已保存到: { results_path } " )
692+
693+ # 生成CSV报告
694+ report_path = results_dir / "llm_webkit_evaluation_report.csv"
695+ DataSaver .save_summary_report (result , report_path ) # 直接传递result对象
696+ print (f"✅ CSV报告已保存到: { report_path } " )
697+
698+ print ("\n " + "=" * 60 )
699+ print ("✅ LLM-WebKit 6项指标评测完成!" )
700+
701+
461702if __name__ == "__main__" :
462703 try :
463704 demo_basic_evaluation ()
464- # demo_extractor_comparison()
705+ # demo_llm_webkit_evaluation() # 使用新的LLM-WebKit评测示例
465706 print ("\n ✅ 示例运行完成!" )
466707
467708 except Exception as e :
0 commit comments