11import datetime
22from typing import List , Optional
33
4+ from lxml .cssselect import CSSSelector
5+ from lxml .etree import XPath
6+
47from fundus .parser import ArticleBody , BaseParser , Image , ParserProxy , attribute
58from fundus .parser .utility import (
69 extract_article_body_with_selector ,
912 generic_topic_parsing ,
1013 image_extraction ,
1114)
12- from lxml .cssselect import CSSSelector
13- from lxml .etree import XPath
1415
1516
1617class StuttgarterZeitungParser (ParserProxy ):
@@ -19,7 +20,7 @@ class V1(BaseParser):
1920 _subheadline_selector = CSSSelector ("div.article-body h2" )
2021
2122 @attribute
22- def body (self ) -> ArticleBody :
23+ def body (self ) -> Optional [ ArticleBody ] :
2324 return extract_article_body_with_selector (
2425 self .precomputed .doc ,
2526 paragraph_selector = self ._paragraph_selector ,
@@ -47,7 +48,7 @@ def images(self) -> List[Image]:
4748 return image_extraction (
4849 doc = self .precomputed .doc ,
4950 paragraph_selector = self ._paragraph_selector ,
50- image_selector = XPath ("//figure//img" ),
51+ image_selector = XPath ("//figure//picture// img" ),
5152 caption_selector = XPath ("./ancestor::figure//figcaption" ),
5253 relative_urls = True ,
5354 )
0 commit comments