1+ #!/usr/bin/env python3
2+ """
3+ WebMainBench 基本使用示例
4+ """
5+
6+ import json
7+ from pathlib import Path
8+
9+ # 导入 WebMainBench 模块
10+ from webmainbench import (
11+ DataLoader , DataSaver , BenchmarkDataset , DataSample ,
12+ ExtractorFactory , MainHTMLEvaluator ,
13+ format_results , setup_logging
14+ )
15+
16+
17+ def load_benchdata (dataset_path : str ) -> BenchmarkDataset :
18+ dataset_path = Path (dataset_path )
19+ print (f"📂 数据集文件: { dataset_path } " )
20+
21+ if not dataset_path .exists ():
22+ print (f"❌ 数据文件不存在: { dataset_path } " )
23+ print ("请确保已运行数据提取命令创建样本数据集" )
24+ return
25+
26+ # 加载数据集
27+ dataset = DataLoader .load_jsonl (dataset_path , include_results = False )
28+ dataset .name = "real_preprocessed_html_test"
29+ dataset .description = "基于真实数据的预处理HTML功能测试"
30+ return dataset
31+
32+
33+ def load_extractor (model_path : str ):
34+ extractor = ExtractorFactory .create ("dripper" , config = {"model_path" : model_path })
35+ return extractor
36+
37+
38+ def save_results (result_file : Path , results : list [dict ]):
39+ with result_file .open ("w" , encoding = "utf-8" ) as f :
40+ for res in results :
41+ f .write (json .dumps (res , ensure_ascii = False ) + "\n " )
42+
43+
44+
45+ def demo_llm_webkit_with_preprocessed_html_evaluation (model_path : str ):
46+ """演示LLM-WebKit预处理HTML功能的评测"""
47+
48+ print ("\n === LLM-WebKit 预处理HTML功能演示 ===\n " )
49+
50+ # 设置日志
51+ setup_logging (level = "INFO" )
52+
53+ # 1. 从真实数据集加载包含预处理HTML的数据
54+ print ("1. 从真实数据集加载预处理HTML数据..." )
55+
56+ # 使用DataLoader加载真实的样本数据
57+
58+ dataset = load_benchdata ("data/WebMainBench_llm-webkit_v1_WebMainBench_1827_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl" )
59+ print (f"✅ 真实数据集加载成功,包含 { len (dataset )} 个样本" )
60+
61+
62+
63+ # 2. 创建预处理HTML模式的LLM-WebKit抽取器
64+ print ("2. 创建预处理HTML模式的LLM-WebKit抽取器..." )
65+
66+ extractor = load_extractor (model_path )
67+ print (f"✅ 抽取器创建成功" )
68+ print (f"📋 配置信息:" )
69+ print (f" - 跳过LLM推理: 是(直接处理预处理HTML)" )
70+ print ()
71+
72+ # 4. 运行评测
73+ print ("4. 开始评测..." )
74+ print ("=" * 50 )
75+
76+ evaluator = MainHTMLEvaluator ()
77+ result = evaluator .evaluate (
78+ dataset = dataset ,
79+ extractor = extractor ,
80+ max_samples = None
81+ )
82+
83+ # 5. 显示评测结果
84+ print ("\n 5. 📊 预处理HTML模式评测结果:" )
85+ print ("=" * 50 )
86+
87+ results_dict = result .to_dict ()
88+ metrics = results_dict .get ('overall_metrics' , {})
89+
90+ # 显示关键指标
91+ print (f"\n 🏆 综合指标:" )
92+ for key in metrics .keys ():
93+ print (f" { key } : { metrics [key ]:.4f} " )
94+
95+ print (f"\n ⚡ 性能统计:" )
96+ sample_results = results_dict .get ('sample_results' , [])
97+ if sample_results :
98+ extraction_times = [s .get ('extraction_time' , 0 ) for s in sample_results if s .get ('extraction_success' )]
99+ if extraction_times :
100+ avg_time = sum (extraction_times ) / len (extraction_times )
101+ print (f" 平均提取时间: { avg_time :.3f} 秒" )
102+ print (f" 处理速度: { 1 / avg_time :.1f} 样本/秒" )
103+
104+ success_count = len ([s for s in sample_results if s .get ('extraction_success' , False )])
105+ print (f" 成功样本数: { success_count } /{ len (dataset )} " )
106+
107+ # 7. 保存结果
108+ print (f"\n 6. 💾 保存评测结果..." )
109+
110+ results_dir = Path ("results" )
111+ results_dir .mkdir (exist_ok = True )
112+ # 新增:保存带抽取结果的增强数据集(JSONL格式)
113+ jsonl_dataset_path = results_dir / f"{ extractor .name } _preprocessed_html_dataset_with_results.jsonl"
114+ save_results (jsonl_dataset_path , result .sample_results )
115+ print (f"✅ 结果已保存到: { jsonl_dataset_path } " )
116+
117+
118+ print (f"✅ 带抽取结果的JSONL数据集已保存到: { jsonl_dataset_path } " )
119+ results_path = results_dir / f"{ extractor .name } _preprocessed_html_evaluation_results.json"
120+ report_path = results_dir / f"{ extractor .name } _preprocessed_html_evaluation_report.csv"
121+
122+ DataSaver .save_evaluation_results (result , results_path )
123+ DataSaver .save_summary_report (result , report_path )
124+
125+ print (f"✅ 详细结果已保存到: { results_path } " )
126+ print (f"✅ CSV报告已保存到: { report_path } " )
127+
128+
129+
130+ if __name__ == "__main__" :
131+ import argparse
132+ parser = argparse .ArgumentParser (description = "WebMainBench 基本使用示例" )
133+ parser .add_argument ("--model_path" , required = True , help = "LLM model路径" )
134+ args = parser .parse_args ()
135+ try :
136+ demo_llm_webkit_with_preprocessed_html_evaluation (args .model_path )
137+ print ("\n ✅ 示例运行完成!" )
138+
139+ except Exception as e :
140+ print (f"\n ❌ 运行出错: { e } " )
141+ import traceback
142+ traceback .print_exc ()
0 commit comments