diff --git a/examples/basic_usage.py b/examples/basic_usage.py index 5fc6dd5..e257867 100755 --- a/examples/basic_usage.py +++ b/examples/basic_usage.py @@ -290,7 +290,7 @@ def quicksort(arr): return dataset -def demo_basic_evaluation(): +def demo_basic_mock_evaluation(): """演示基本评测流程""" print("=== WebMainBench 基本使用示例 ===\n") @@ -382,12 +382,12 @@ def _extract_content(self, html, url=None): results_dir = Path("results") results_dir.mkdir(exist_ok=True) - results_path = results_dir / "evaluation_results.json" + results_path = results_dir / "mock_evaluation_results.json" DataSaver.save_evaluation_results(result, results_path) print(f"\n结果已保存到: {results_path}") # 10. 生成报告 - report_path = results_dir / "evaluation_report.csv" + report_path = results_dir / "mock_evaluation_report.csv" DataSaver.save_summary_report(result, report_path) print(f"报告已保存到: {report_path}") @@ -701,8 +701,8 @@ def hello_world(): if __name__ == "__main__": try: - demo_basic_evaluation() - # demo_llm_webkit_evaluation() # 使用新的LLM-WebKit评测示例 + demo_basic_mock_evaluation() + demo_llm_webkit_evaluation() # 使用LLM-WebKit评测示例 print("\n✅ 示例运行完成!") except Exception as e: diff --git a/results/llm_webkit_evaluation_results.json b/results/llm_webkit_evaluation_results.json index 9839d38..41fbf81 100644 --- a/results/llm_webkit_evaluation_results.json +++ b/results/llm_webkit_evaluation_results.json @@ -2,7 +2,7 @@ "metadata": { "dataset_name": "llm_webkit_test", "extractor_name": "llm-webkit", - "timestamp": "2025-07-31T13:52:12.948959", + "timestamp": "2025-07-31T14:59:59.169188", "total_samples": 3 }, "overall_metrics": { @@ -17,7 +17,7 @@ { "sample_id": "text_code_sample", "extraction_success": true, - "extraction_time": 3.6406631469726562, + "extraction_time": 3.614135980606079, "metrics": { "code_edit": { "score": 0.488, @@ -113,7 +113,7 @@ { "sample_id": "table_sample", "extraction_success": true, - "extraction_time": 1.6590700149536133, + "extraction_time": 1.6187489032745361, "metrics": { "code_edit": { "score": 1.0, @@ -209,7 +209,7 @@ { "sample_id": "formula_sample", "extraction_success": true, - "extraction_time": 1.5354089736938477, + "extraction_time": 1.5252501964569092, "metrics": { "code_edit": { "score": 1.0, diff --git a/results/evaluation_report.csv b/results/mock_evaluation_report.csv similarity index 100% rename from results/evaluation_report.csv rename to results/mock_evaluation_report.csv diff --git a/results/evaluation_results.json b/results/mock_evaluation_results.json similarity index 98% rename from results/evaluation_results.json rename to results/mock_evaluation_results.json index 7b603f6..c697851 100644 --- a/results/evaluation_results.json +++ b/results/mock_evaluation_results.json @@ -2,7 +2,7 @@ "metadata": { "dataset_name": "sample_dataset", "extractor_name": "mock", - "timestamp": "2025-07-31T14:29:43.477342", + "timestamp": "2025-07-31T14:59:49.917729", "total_samples": 2 }, "overall_metrics": { @@ -17,7 +17,7 @@ { "sample_id": "sample-001-programming-tutorial", "extraction_success": true, - "extraction_time": 4.0531158447265625e-06, + "extraction_time": 7.3909759521484375e-06, "metrics": { "code_edit": { "score": 1.0, diff --git a/webmainbench/extractors/jina_extractor.py b/webmainbench/extractors/jina_extractor.py index 15bad4d..8577115 100644 --- a/webmainbench/extractors/jina_extractor.py +++ b/webmainbench/extractors/jina_extractor.py @@ -79,7 +79,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult: return ExtractionResult( content=content, - content_list=content_list, + # content_list=content_list, title=title, # confidence_score=self._calculate_confidence(content, content_list), success=True diff --git a/webmainbench/extractors/llm_webkit_extractor.py b/webmainbench/extractors/llm_webkit_extractor.py index b49df9b..f2c2b5c 100644 --- a/webmainbench/extractors/llm_webkit_extractor.py +++ b/webmainbench/extractors/llm_webkit_extractor.py @@ -661,7 +661,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult: # 创建结果对象 result = ExtractionResult( content=main_content, - content_list=content_list, + # content_list=content_list, title=self._extract_title(html), language=self._detect_language(main_content), confidence_score=confidence,