Skip to content

Commit 1ca8123

Browse files
committed
将LLM api 配置放到config.py中
1 parent 00a4b62 commit 1ca8123

File tree

3 files changed

+5
-10
lines changed

3 files changed

+5
-10
lines changed

config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""全局配置文件"""# LLM配置,用于修正抽取工具的抽取结果LLM_CONFIG = { 'llm_base_url': '', 'llm_api_key': '', 'llm_model': 'deepseek-chat', 'use_llm': True}

examples/multi_extractor_compare.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,15 @@
11
from webmainbench import DataLoader, Evaluator, ExtractorFactory, DataSaver
22
from pathlib import Path
33

4-
# 全局LLM配置
5-
LLM_CONFIG = {
6-
'llm_base_url': '',
7-
'llm_api_key': '',
8-
'llm_model': '',
9-
'use_llm': True
10-
}
4+
# 如需调用LLM修正抽取结果,在config.py中配置 LLM api
115

126
def all_extractor_comparison():
137
"""演示多抽取器对比"""
148

159
print("\n=== 多抽取器对比演示 ===\n")
1610

1711
# 创建数据集
18-
dataset_path = Path("../data/test_math.jsonl")
12+
dataset_path = Path("../data/WebMainBench_llm-webkit_v1_WebMainBench_7887_within_formula.jsonl")
1913
dataset = DataLoader.load_jsonl(dataset_path)
2014

2115
# 创建webkit抽取器
@@ -35,7 +29,7 @@ def all_extractor_comparison():
3529
# 运行对比
3630
evaluator = Evaluator()
3731
extractors = [webkit_extractor, magic_extractor, trafilatura_extractor, resiliparse_extractor]
38-
# extractors = [webkit_extractor]
32+
extractors = [webkit_extractor]
3933

4034

4135
results = evaluator.compare_extractors(

webmainbench/metrics/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]:
197197
return {'code': '', 'formula': '', 'table': '', 'text': ''}
198198

199199
# 加载 llm 配置
200-
from examples.multi_extractor_compare import LLM_CONFIG
200+
from config import LLM_CONFIG
201201
# 直接创建具体的提取器实例
202202
from .code_extractor import CodeSplitter
203203
from .formula_extractor import FormulaSplitter

0 commit comments

Comments
 (0)