Skip to content

Commit 779a235

Browse files
committed
refine image extractor
1 parent c4f8eb2 commit 779a235

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

src/fundus/publishers/de/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@
3939
from .rn import RuhrNachrichtenParser
4040
from .spon import SPONParser
4141
from .sportschau import SportSchauParser
42-
from .stuttgarter_zeitung import StuttgarterZeitungParser
4342
from .stern import SternParser
43+
from .stuttgarter_zeitung import StuttgarterZeitungParser
4444
from .sz import SZParser
4545
from .tagesschau import TagesschauParser
4646
from .tagesspiegel import TagesspiegelParser

src/fundus/publishers/de/stuttgarter_zeitung.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import datetime
22
from typing import List, Optional
33

4+
from lxml.cssselect import CSSSelector
5+
from lxml.etree import XPath
6+
47
from fundus.parser import ArticleBody, BaseParser, Image, ParserProxy, attribute
58
from fundus.parser.utility import (
69
extract_article_body_with_selector,
@@ -9,8 +12,6 @@
912
generic_topic_parsing,
1013
image_extraction,
1114
)
12-
from lxml.cssselect import CSSSelector
13-
from lxml.etree import XPath
1415

1516

1617
class StuttgarterZeitungParser(ParserProxy):
@@ -19,7 +20,7 @@ class V1(BaseParser):
1920
_subheadline_selector = CSSSelector("div.article-body h2")
2021

2122
@attribute
22-
def body(self) -> ArticleBody:
23+
def body(self) -> Optional[ArticleBody]:
2324
return extract_article_body_with_selector(
2425
self.precomputed.doc,
2526
paragraph_selector=self._paragraph_selector,
@@ -47,7 +48,7 @@ def images(self) -> List[Image]:
4748
return image_extraction(
4849
doc=self.precomputed.doc,
4950
paragraph_selector=self._paragraph_selector,
50-
image_selector=XPath("//figure//img"),
51+
image_selector=XPath("//figure//picture//img"),
5152
caption_selector=XPath("./ancestor::figure//figcaption"),
5253
relative_urls=True,
5354
)

0 commit comments

Comments
 (0)