Skip to content

Commit ef3321d

Browse files
committed
x
1 parent cddef4e commit ef3321d

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

scripts/process_dataset.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ def process_single_item(data: dict, verbose: bool = False) -> dict:
2323
from llm_web_kit.main_html_parser.parser.tag_mapping import MapItemToHtmlTagsParser
2424

2525
# 从数据中获取字段
26-
typical_raw_tag_html = data.get('llm_webkit_html', '') # 预处理HTML
27-
html = data.get('llm_webkit_html', '') # 预处理HTML
26+
typical_raw_tag_html = data.get('typical_raw_tag_html', '') # 预处理HTML
2827
llm_response = data.get('llm_response_html', '') # LLM响应HTML
2928

3029
# 检查必要字段
@@ -37,7 +36,7 @@ def process_single_item(data: dict, verbose: bool = False) -> dict:
3736
# 构建 pre_data(参考 llm_webkit_extractor.py:665)
3837
pre_data = {
3938
'typical_raw_tag_html': typical_raw_tag_html,
40-
'typical_raw_html': html,
39+
'typical_raw_html': typical_raw_tag_html,
4140
'llm_response': llm_response,
4241
'html_source': typical_raw_tag_html
4342
}

0 commit comments

Comments
 (0)