Skip to content

Commit 730c8a9

Browse files
authored
Merge pull request #51 from 1041206149/chonggou
将LLM api 配置放到config.py中
2 parents fb0514a + e45c82a commit 730c8a9

File tree

5 files changed

+86
-13
lines changed

5 files changed

+86
-13
lines changed

examples/multi_extractor_compare.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,15 @@
11
from webmainbench import DataLoader, Evaluator, ExtractorFactory, DataSaver
22
from pathlib import Path
33

4-
# 全局LLM配置
5-
LLM_CONFIG = {
6-
'llm_base_url': '',
7-
'llm_api_key': '',
8-
'llm_model': '',
9-
'use_llm': True
10-
}
4+
# 如需调用LLM修正抽取结果,在 webmainbench/config.py 中配置 LLM api
115

126
def all_extractor_comparison():
137
"""演示多抽取器对比"""
148

159
print("\n=== 多抽取器对比演示 ===\n")
1610

1711
# 创建数据集
18-
dataset_path = Path("../data/test_math.jsonl")
12+
dataset_path = Path("../data/WebMainBench_llm-webkit_v1_WebMainBench_7887_within_formula.jsonl")
1913
dataset = DataLoader.load_jsonl(dataset_path)
2014

2115
# 创建webkit抽取器

webmainbench/config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
"""
2+
全局配置文件
3+
"""
4+
5+
# LLM配置,用于修正抽取工具的抽取结果
6+
LLM_CONFIG = {
7+
'llm_base_url': '',
8+
'llm_api_key': '',
9+
'llm_model': 'deepseek-chat',
10+
'use_llm': True
11+
}

webmainbench/evaluator/evaluator.py

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from dataclasses import dataclass
66
from typing import Dict, Any, List, Optional, Union, Iterator
7-
import time
7+
import time, sys
88
import itertools
99
from datetime import datetime
1010
from pathlib import Path
@@ -85,10 +85,78 @@ def __init__(self, metric_config: Dict[str, Any] = None):
8585
Args:
8686
metric_config: Configuration for metrics
8787
"""
88+
89+
self._validate_llm_config()
90+
8891
self.metric_calculator = MetricCalculator(metric_config)
8992
self.metric_config = metric_config or {}
90-
91-
def evaluate(self,
93+
94+
def _validate_llm_config(self):
95+
"""验证LLM配置的完整性和有效性"""
96+
import time
97+
from ..config import LLM_CONFIG
98+
99+
if LLM_CONFIG.get('use_llm', False):
100+
# 检查配置完整性
101+
if not LLM_CONFIG.get('llm_base_url') or not LLM_CONFIG.get('llm_api_key'):
102+
print("\n" + "=" * 60)
103+
print("❌ 错误:LLM配置不完整!")
104+
print("-" * 60)
105+
print("当前 use_llm = True,但缺少必要的API配置。")
106+
print("\n请在 webmainbench/config.py 中完成以下配置:")
107+
print(" 1. llm_base_url (例如: 'https://api.deepseek.com')")
108+
print(" 2. llm_api_key (例如: 'sk-xxxxxxxxxxxx')")
109+
print("\n或者设置 use_llm = False 来禁用LLM功能。")
110+
print("=" * 60 + "\n")
111+
sys.exit(1)
112+
113+
# 验证API有效性
114+
try:
115+
from openai import OpenAI
116+
117+
print("正在验证LLM API配置...")
118+
client = OpenAI(
119+
base_url=LLM_CONFIG.get('llm_base_url'),
120+
api_key=LLM_CONFIG.get('llm_api_key')
121+
)
122+
123+
# 发送测试请求
124+
response = client.chat.completions.create(
125+
model=LLM_CONFIG.get('llm_model', 'deepseek-chat'),
126+
messages=[{"role": "user", "content": "test"}],
127+
max_tokens=5,
128+
temperature=0
129+
)
130+
131+
print("✅ LLM API配置验证成功!\n使用 基础方案➕LLM增强提取效果 进行评测。")
132+
133+
except Exception as e:
134+
print("\n" + "=" * 60)
135+
print("❌ 错误:LLM API配置无效!")
136+
print("-" * 60)
137+
print(f"验证失败原因: {str(e)}")
138+
print("\n请检查 webmainbench/config.py 中的配置:")
139+
print(" 1. llm_base_url 是否正确")
140+
print(" 2. llm_api_key 是否有效")
141+
print(" 3. llm_model 是否支持")
142+
print(" 4. 网络连接是否正常")
143+
print("\n或者设置 use_llm = False 来禁用LLM功能。")
144+
print("=" * 60 + "\n")
145+
sys.exit(1)
146+
else:
147+
# 未启用LLM的提示
148+
print("\n" + "=" * 60)
149+
print("⚠️ 注意:当前未启用LLM增强提取效果功能")
150+
print(" 如需启用LLM增强提取效果,请在 webmainbench/config.py 中配置:")
151+
print(" - 设置 use_llm = True")
152+
print(" - 填写 llm_base_url")
153+
print(" - 填写 llm_api_key")
154+
print("=" * 60)
155+
print(" (5秒后使用基础方案进行对比...)")
156+
time.sleep(5)
157+
print()
158+
159+
def evaluate(self,
92160
dataset: BenchmarkDataset,
93161
extractor: Union[BaseExtractor, str],
94162
extractor_config: Dict[str, Any] = None,

webmainbench/metrics/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]:
197197
return {'code': '', 'formula': '', 'table': '', 'text': ''}
198198

199199
# 加载 llm 配置
200-
from examples.multi_extractor_compare import LLM_CONFIG
200+
from ..config import LLM_CONFIG
201+
201202
# 直接创建具体的提取器实例
202203
from .code_extractor import CodeSplitter
203204
from .formula_extractor import FormulaSplitter

webmainbench/metrics/base_content_splitter.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ def should_use_llm(self, field_name: str) -> bool:
5252

5353
# 默认逻辑:对groundtruth内容不使用LLM,对其他内容使用
5454
if field_name == "groundtruth_content":
55-
print(f"[DEBUG] 检测到groundtruth内容,不使用LLM")
5655
return False
5756
return True
5857

0 commit comments

Comments
 (0)