Skip to content

Commit 30e2650

Browse files
committed
fix: llm-webkit extraction_success status capture
1 parent fa4c55b commit 30e2650

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

webmainbench/extractors/llm_webkit_extractor.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -587,9 +587,9 @@ def _generate_main_html_with_parser(self, original_html: str, classification_res
587587

588588
def _extract_content_from_main_html(self, main_html: str, url: str = None) -> tuple:
589589
"""使用llm-webkit的方法将main_html提取成content"""
590+
import traceback
590591
try:
591592
from llm_web_kit.simple import extract_html_to_md
592-
import traceback
593593

594594
print(f"🔧 开始使用llm-webkit简单接口提取content...")
595595

@@ -604,9 +604,9 @@ def _extract_content_from_main_html(self, main_html: str, url: str = None) -> tu
604604
except Exception as e:
605605
print(f"❌ llm-webkit提取失败: {e}")
606606
print(f"❌ 错误详情: {traceback.format_exc()}")
607-
return "", []
607+
raise RuntimeError(f"llm-webkit提取失败: {str(e)}") from e
608+
608609

609-
610610
def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
611611
"""
612612
使用高级LLM推理提取内容.
@@ -721,9 +721,11 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
721721

722722
except Exception as e:
723723
extraction_time = time.time() - start_time
724+
import traceback
724725
return ExtractionResult.create_error_result(
725726
f"LLM-WebKit extraction failed: {str(e)}",
726-
extraction_time=extraction_time
727+
traceback.format_exc(),
728+
extraction_time
727729
)
728730

729731
def _extract_title(self, html: str) -> Optional[str]:

0 commit comments

Comments
 (0)