diff --git a/src/fundus/publishers/ls/lesotho_times.py b/src/fundus/publishers/ls/lesotho_times.py index 8aff3bc2f..aa584ca04 100644 --- a/src/fundus/publishers/ls/lesotho_times.py +++ b/src/fundus/publishers/ls/lesotho_times.py @@ -15,14 +15,14 @@ class LesothoTimesParser(ParserProxy): class V1(BaseParser): - _paragraph_selector = XPath("//div[@class='entry-content']/p[text() or span]") + _paragraph_selector = XPath("//div[contains(@class,'entry-content')]/p[text() or span]") _subheadline_selector = XPath( - "//div[@class='entry-content']/p[not(text() or em) and strong[not(em)] and position()>4]" + "//div[contains(@class,'entry-content')]/p[not(text() or em) and strong[not(em)] and position()>4]" ) - _summary_selector = XPath("//div[@class='entry-content']/p[not(text()) and (strong[em] or em)]") + _summary_selector = XPath("//div[contains(@class,'entry-content')]/p[not(text()) and (strong[em] or em)]") _author_selector = XPath( - "//div[@class='entry-content']/p[not(text() or em) and strong[not(em)] and position()<5]" + "//div[contains(@class,'entry-content')]/p[not(text() or em) and strong[not(em)] and position()<5]" ) @attribute @@ -44,13 +44,16 @@ def authors(self) -> List[str]: @attribute def title(self) -> Optional[str]: - return self.precomputed.meta.get("og:title") + if title := self.precomputed.meta.get("og:title"): + return title.replace("- Lesotho Times", "").strip() + return None @attribute def images(self) -> List[Image]: return image_extraction( doc=self.precomputed.doc, paragraph_selector=self._paragraph_selector, - image_selector=XPath("//div[@class='feature-postimg']/img"), + image_selector=XPath("//div[@class='feature-postimg' or contains(@class, 'post-image')]/img"), + caption_selector=XPath("./ancestor::div[contains(@class,'media')]//figcaption"), upper_boundary_selector=XPath("//header"), )