Skip to content

Commit 33d7743

Browse files
committed
fix(web): handle metadata extraction failures - Add proper object to dict conversion for etadata - Add fallback for metadata extraction errors - Improve error handling and logging
1 parent 4e0a2b5 commit 33d7743

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

agentic_rag/web_processor.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,21 @@ def process_url(self, url: str) -> List[Dict[str, Any]]:
5555

5656
# Extract text and metadata
5757
text = extract(downloaded, include_comments=False, include_tables=False)
58-
metadata = extract_metadata(downloaded)
58+
try:
59+
metadata = extract_metadata(downloaded)
60+
# Convert metadata to dict if it's not already
61+
if not isinstance(metadata, dict):
62+
metadata = {
63+
'title': getattr(metadata, 'title', ''),
64+
'author': getattr(metadata, 'author', ''),
65+
'date': getattr(metadata, 'date', ''),
66+
'sitename': getattr(metadata, 'sitename', ''),
67+
'categories': getattr(metadata, 'categories', []),
68+
'tags': getattr(metadata, 'tags', [])
69+
}
70+
except Exception as e:
71+
print(f"Warning: Metadata extraction failed: {str(e)}")
72+
metadata = {}
5973

6074
if not text:
6175
raise ValueError(f"No text content extracted from URL: {url}")

0 commit comments

Comments
 (0)