diff --git a/docs/supported_publishers.md b/docs/supported_publishers.md index 7952efc96..3b7f1f8f3 100644 --- a/docs/supported_publishers.md +++ b/docs/supported_publishers.md @@ -1207,6 +1207,28 @@     + + + TOnline + + +
T-Online
+ + + + www.t-online.de + + + + de + + + images + topics + +   +   + Tagesschau diff --git a/src/fundus/publishers/de/__init__.py b/src/fundus/publishers/de/__init__.py index e67dbe58f..085e9e54c 100644 --- a/src/fundus/publishers/de/__init__.py +++ b/src/fundus/publishers/de/__init__.py @@ -49,7 +49,7 @@ from .wdr import WDRParser from .winfuture import WinfutureParser from .zdf import ZDFParser - +from .t_online import TOnlineParser # noinspection PyPep8Naming class DE(metaclass=PublisherGroup): @@ -595,3 +595,12 @@ class DE(metaclass=PublisherGroup): Sitemap("https://www.gamestar.de/artikel_archiv_index.xml"), ], ) + + TOnline = Publisher( + name="T-Online", + domain="https://www.t-online.de/", + parser=TOnlineParser, + sources=[ + Sitemap("https://www.t-online.de/sitemap.xml"), + ], + ) diff --git a/src/fundus/publishers/de/t_online.py b/src/fundus/publishers/de/t_online.py new file mode 100644 index 000000000..489509c9b --- /dev/null +++ b/src/fundus/publishers/de/t_online.py @@ -0,0 +1,39 @@ +import datetime +from typing import List, Optional + +from lxml.cssselect import CSSSelector + +from fundus.parser import ArticleBody, BaseParser, ParserProxy, attribute +from fundus.parser.utility import ( + extract_article_body_with_selector, + generic_author_parsing, + generic_date_parsing, +) + + +class TOnlineParser(ParserProxy): + class V1(BaseParser): + _paragraph_selector = CSSSelector("div[class*='px-24'] > p.text-18") + _summary_selector = CSSSelector("p.font-bold.text-18") + _subheadline_selector = CSSSelector("h3, h2") + + @attribute + def body(self) -> Optional[ArticleBody]: + return extract_article_body_with_selector( + self.precomputed.doc, + summary_selector=self._summary_selector, + subheadline_selector=self._subheadline_selector, + paragraph_selector=self._paragraph_selector, + ) + + @attribute + def publishing_date(self) -> Optional[datetime.datetime]: + return generic_date_parsing(self.precomputed.ld.bf_search("datePublished")) + + @attribute + def authors(self) -> List[str]: + return generic_author_parsing(self.precomputed.ld.bf_search("author")) + + @attribute + def title(self) -> Optional[str]: + return self.precomputed.ld.bf_search("headline") \ No newline at end of file