Skip to content

Commit 34d09af

Browse files
committed
move parser
1 parent 2ed6323 commit 34d09af

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/fundus/scraping/url.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ class Sitemap(URLSource):
164164
_decompressor: ClassVar[_ArchiveDecompressor] = _ArchiveDecompressor()
165165
_sitemap_selector: ClassVar[XPath] = XPath("//*[local-name()='sitemap']/*[local-name()='loc']")
166166
_url_selector: ClassVar[XPath] = XPath("//*[local-name()='url']/*[local-name()='loc']")
167+
_parser = XMLParser(strip_cdata=False)
167168

168169
def __iter__(self) -> Iterator[str]:
169170
def yield_recursive(sitemap_url: str) -> Iterator[str]:
@@ -193,8 +194,7 @@ def yield_recursive(sitemap_url: str) -> Iterator[str]:
193194
if not content:
194195
logger.warning(f"Warning! Empty sitemap at {sitemap_url!r}")
195196
return
196-
parser = XMLParser(strip_cdata=False)
197-
tree = lxml.etree.fromstring(content, parser=parser)
197+
tree = lxml.etree.fromstring(content, parser=self._parser)
198198
urls = [node.text for node in self._url_selector(tree)]
199199
if urls:
200200
for new_url in reversed(urls) if self.reverse else urls:

0 commit comments

Comments
 (0)