Skip to content

Commit 716b827

Browse files
authored
Merge pull request #794 from flairNLP/recover-from-malformed-xml
Handle malformed XML
2 parents e39b108 + 3f1cb64 commit 716b827

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

src/fundus/scraping/url.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ class Sitemap(URLSource):
164164
_decompressor: ClassVar[_ArchiveDecompressor] = _ArchiveDecompressor()
165165
_sitemap_selector: ClassVar[XPath] = XPath("//*[local-name()='sitemap']/*[local-name()='loc']")
166166
_url_selector: ClassVar[XPath] = XPath("//*[local-name()='url']/*[local-name()='loc']")
167-
_parser = XMLParser(strip_cdata=False)
167+
_parser = XMLParser(strip_cdata=False, recover=True)
168168

169169
def __iter__(self) -> Iterator[str]:
170170
def yield_recursive(sitemap_url: str) -> Iterator[str]:

0 commit comments

Comments
 (0)