Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions docs/supported_publishers.md
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,28 @@
<td>&#160;</td>
<td>&#160;</td>
</tr>
<tr>
<td>
<code>TOnline</code>
</td>
<td>
<div>T-Online</div>
</td>
<td>
<a href="https://www.t-online.de/">
<span>www.t-online.de</span>
</a>
</td>
<td>
<code>de</code>
</td>
<td>
<code>images</code>
<code>topics</code>
</td>
<td>&#160;</td>
<td>&#160;</td>
</tr>
<tr>
<td>
<code>Tagesschau</code>
Expand Down
11 changes: 10 additions & 1 deletion src/fundus/publishers/de/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from .wdr import WDRParser
from .winfuture import WinfutureParser
from .zdf import ZDFParser

from .t_online import TOnlineParser

# noinspection PyPep8Naming
class DE(metaclass=PublisherGroup):
Expand Down Expand Up @@ -595,3 +595,12 @@ class DE(metaclass=PublisherGroup):
Sitemap("https://www.gamestar.de/artikel_archiv_index.xml"),
],
)

TOnline = Publisher(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As soon as you're done implementing this parser, please follow these steps to generate the test files. Make sure that the test article includes, topics, images and subheadlines.

name="T-Online",
domain="https://www.t-online.de/",
parser=TOnlineParser,
sources=[
Sitemap("https://www.t-online.de/sitemap.xml"),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

],
)
39 changes: 39 additions & 0 deletions src/fundus/publishers/de/t_online.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import datetime
from typing import List, Optional

from lxml.cssselect import CSSSelector

from fundus.parser import ArticleBody, BaseParser, ParserProxy, attribute
from fundus.parser.utility import (
extract_article_body_with_selector,
generic_author_parsing,
generic_date_parsing,
)


class TOnlineParser(ParserProxy):
class V1(BaseParser):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are missing the images and topics attributes.

_paragraph_selector = CSSSelector("div[class*='px-24'] > p.text-18")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to rework all the selectors, they do not seem to work.

_summary_selector = CSSSelector("p.font-bold.text-18")
_subheadline_selector = CSSSelector("h3, h2")

@attribute
def body(self) -> Optional[ArticleBody]:
return extract_article_body_with_selector(
self.precomputed.doc,
summary_selector=self._summary_selector,
subheadline_selector=self._subheadline_selector,
paragraph_selector=self._paragraph_selector,
)

@attribute
def publishing_date(self) -> Optional[datetime.datetime]:
return generic_date_parsing(self.precomputed.ld.bf_search("datePublished"))

@attribute
def authors(self) -> List[str]:
return generic_author_parsing(self.precomputed.ld.bf_search("author"))

@attribute
def title(self) -> Optional[str]:
return self.precomputed.ld.bf_search("headline")