33
44import email .utils
55import logging
6+ import re
67from datetime import datetime , timedelta , timezone
78from typing import TYPE_CHECKING
89
@@ -173,12 +174,19 @@ def _generate_sensor_entry(
173174 if key in ["published" , "updated" , "created" , "expired" ]:
174175 parsed_date : datetime = self ._parse_date (value )
175176 sensor_entry [key ] = parsed_date .strftime (self ._date_format )
177+ elif key == "image" :
178+ sensor_entry ["image" ] = value .get ("href" )
176179 else :
177180 sensor_entry [key ] = value
178181
179- self ._process_image (feed_entry , sensor_entry )
180-
181- _LOGGER .debug ("Feed %s: Generated sensor entry: %s" , self .name , sensor_entry )
182+ if "image" in self ._inclusions and "image" not in sensor_entry :
183+ sensor_entry ["image" ] = self ._process_image (feed_entry )
184+ if (
185+ "link" in self ._inclusions
186+ and "link" not in sensor_entry
187+ and (processed_link := self ._process_link (feed_entry ))
188+ ):
189+ sensor_entry ["link" ] = processed_link
182190 return sensor_entry
183191
184192 def _parse_date (self : FeedParserSensor , date : str ) -> datetime :
@@ -194,42 +202,63 @@ def _parse_date(self: FeedParserSensor, date: str) -> datetime:
194202 self .name ,
195203 date ,
196204 )
205+ # best effort to parse the date using dateutil
206+ parsed_time = parser .parse (date )
207+
208+ if not parsed_time .tzinfo :
209+ # best effort to parse the date using dateutil
197210 parsed_time = parser .parse (date )
198- if not parsed_time .tzname () :
199- # replace tzinfo with UTC offset if tzinfo does not contain a TZ name
200- parsed_time = parsed_time . replace (
201- tzinfo = timezone ( parsed_time . utcoffset ()), # type: ignore[arg-type]
211+ if not parsed_time .tzinfo :
212+ msg = (
213+ f"Feed { self . name } : Unable to parse date { date } , "
214+ "caused by an incorrect date format"
202215 )
216+ raise ValueError (msg )
217+ if not parsed_time .tzname ():
218+ # replace tzinfo with UTC offset if tzinfo does not contain a TZ name
219+ parsed_time = parsed_time .replace (
220+ tzinfo = timezone (parsed_time .utcoffset ()), # type: ignore[arg-type]
221+ )
222+
203223 if self ._local_time :
204224 parsed_time = dt .as_local (parsed_time )
205225 _LOGGER .debug ("Feed %s: Parsed date: %s" , self .name , parsed_time )
206226 return parsed_time
207227
208- def _process_image (
209- self : FeedParserSensor ,
210- feed_entry : FeedParserDict ,
211- sensor_entry : dict [str , str ],
212- ) -> None :
213- if "image" in self ._inclusions and "image" not in sensor_entry .keys ():
214- if "enclosures" in feed_entry :
215- images = [
216- enc
217- for enc in feed_entry ["enclosures" ]
218- if enc .type .startswith ("image/" )
219- ]
220- else :
221- images = []
228+ def _process_image (self : FeedParserSensor , feed_entry : FeedParserDict ) -> str :
229+ if "enclosures" in feed_entry and feed_entry ["enclosures" ]:
230+ images = [
231+ enc for enc in feed_entry ["enclosures" ] if enc .type .startswith ("image/" )
232+ ]
222233 if images :
223- sensor_entry ["image" ] = images [0 ]["href" ] # pick the first image found
224- else :
225- _LOGGER .debug (
226- "Feed %s: Image is in inclusions, but no image was found for %s" ,
234+ # pick the first image found
235+ return images [0 ]["href" ]
236+ elif "summary" in feed_entry :
237+ images = re .findall (
238+ r"<img.+?src=\"(.+?)\".+?>" ,
239+ feed_entry ["summary" ],
240+ )
241+ if images :
242+ # pick the first image found
243+ return images [0 ]
244+ _LOGGER .debug (
245+ "Feed %s: Image is in inclusions, but no image was found for %s" ,
246+ self .name ,
247+ feed_entry ,
248+ )
249+ return DEFAULT_THUMBNAIL # use default image if no image found
250+
251+ def _process_link (self : FeedParserSensor , feed_entry : FeedParserDict ) -> str :
252+ """Return link from feed entry."""
253+ if "links" in feed_entry :
254+ if len (feed_entry ["links" ]) > 1 :
255+ _LOGGER .warning (
256+ "Feed %s: More than one link found for %s. Using the first link." ,
227257 self .name ,
228258 feed_entry ,
229259 )
230- sensor_entry [
231- "image"
232- ] = DEFAULT_THUMBNAIL # use default image if no image found
260+ return feed_entry ["links" ][0 ]["href" ]
261+ return ""
233262
234263 @property
235264 def feed_entries (self : FeedParserSensor ) -> list [dict [str , str ]]:
0 commit comments