diff --git a/docs/supported_publishers.md b/docs/supported_publishers.md index 7952efc96..3b7f1f8f3 100644 --- a/docs/supported_publishers.md +++ b/docs/supported_publishers.md @@ -1207,6 +1207,28 @@
TOnline
+ de
+ images
+ topics
+ Tagesschau
diff --git a/src/fundus/publishers/de/__init__.py b/src/fundus/publishers/de/__init__.py
index e67dbe58f..085e9e54c 100644
--- a/src/fundus/publishers/de/__init__.py
+++ b/src/fundus/publishers/de/__init__.py
@@ -49,7 +49,7 @@
from .wdr import WDRParser
from .winfuture import WinfutureParser
from .zdf import ZDFParser
-
+from .t_online import TOnlineParser
# noinspection PyPep8Naming
class DE(metaclass=PublisherGroup):
@@ -595,3 +595,12 @@ class DE(metaclass=PublisherGroup):
Sitemap("https://www.gamestar.de/artikel_archiv_index.xml"),
],
)
+
+ TOnline = Publisher(
+ name="T-Online",
+ domain="https://www.t-online.de/",
+ parser=TOnlineParser,
+ sources=[
+ Sitemap("https://www.t-online.de/sitemap.xml"),
+ ],
+ )
diff --git a/src/fundus/publishers/de/t_online.py b/src/fundus/publishers/de/t_online.py
new file mode 100644
index 000000000..489509c9b
--- /dev/null
+++ b/src/fundus/publishers/de/t_online.py
@@ -0,0 +1,39 @@
+import datetime
+from typing import List, Optional
+
+from lxml.cssselect import CSSSelector
+
+from fundus.parser import ArticleBody, BaseParser, ParserProxy, attribute
+from fundus.parser.utility import (
+ extract_article_body_with_selector,
+ generic_author_parsing,
+ generic_date_parsing,
+)
+
+
+class TOnlineParser(ParserProxy):
+ class V1(BaseParser):
+ _paragraph_selector = CSSSelector("div[class*='px-24'] > p.text-18")
+ _summary_selector = CSSSelector("p.font-bold.text-18")
+ _subheadline_selector = CSSSelector("h3, h2")
+
+ @attribute
+ def body(self) -> Optional[ArticleBody]:
+ return extract_article_body_with_selector(
+ self.precomputed.doc,
+ summary_selector=self._summary_selector,
+ subheadline_selector=self._subheadline_selector,
+ paragraph_selector=self._paragraph_selector,
+ )
+
+ @attribute
+ def publishing_date(self) -> Optional[datetime.datetime]:
+ return generic_date_parsing(self.precomputed.ld.bf_search("datePublished"))
+
+ @attribute
+ def authors(self) -> List[str]:
+ return generic_author_parsing(self.precomputed.ld.bf_search("author"))
+
+ @attribute
+ def title(self) -> Optional[str]:
+ return self.precomputed.ld.bf_search("headline")
\ No newline at end of file