33
44import email .utils
55import logging
6+ import re
67from datetime import datetime , timedelta , timezone
78from typing import TYPE_CHECKING
89
@@ -173,11 +174,19 @@ def _generate_sensor_entry(
173174 if key in ["published" , "updated" , "created" , "expired" ]:
174175 parsed_date : datetime = self ._parse_date (value )
175176 sensor_entry [key ] = parsed_date .strftime (self ._date_format )
177+ elif key == "image" :
178+ sensor_entry ["image" ] = value .get ("href" )
176179 else :
177180 sensor_entry [key ] = value
178181
179- self ._process_image (feed_entry , sensor_entry )
180-
182+ if "image" in self ._inclusions and "image" not in sensor_entry :
183+ sensor_entry ["image" ] = self ._process_image (feed_entry )
184+ if (
185+ "link" in self ._inclusions
186+ and "link" not in sensor_entry
187+ and (processed_link := self ._process_link (feed_entry ))
188+ ):
189+ sensor_entry ["link" ] = processed_link
181190 _LOGGER .debug ("Feed %s: Generated sensor entry: %s" , self .name , sensor_entry )
182191 return sensor_entry
183192
@@ -194,42 +203,63 @@ def _parse_date(self: FeedParserSensor, date: str) -> datetime:
194203 self .name ,
195204 date ,
196205 )
206+ # best effort to parse the date using dateutil
207+ parsed_time = parser .parse (date )
208+
209+ if not parsed_time .tzinfo :
210+ # best effort to parse the date using dateutil
197211 parsed_time = parser .parse (date )
198- if not parsed_time .tzname () :
199- # replace tzinfo with UTC offset if tzinfo does not contain a TZ name
200- parsed_time = parsed_time . replace (
201- tzinfo = timezone ( parsed_time . utcoffset ()), # type: ignore[arg-type]
212+ if not parsed_time .tzinfo :
213+ msg = (
214+ f"Feed { self . name } : Unable to parse date { date } , "
215+ "caused by an incorrect date format"
202216 )
217+ raise ValueError (msg )
218+ if not parsed_time .tzname ():
219+ # replace tzinfo with UTC offset if tzinfo does not contain a TZ name
220+ parsed_time = parsed_time .replace (
221+ tzinfo = timezone (parsed_time .utcoffset ()), # type: ignore[arg-type]
222+ )
223+
203224 if self ._local_time :
204225 parsed_time = dt .as_local (parsed_time )
205226 _LOGGER .debug ("Feed %s: Parsed date: %s" , self .name , parsed_time )
206227 return parsed_time
207228
208- def _process_image (
209- self : FeedParserSensor ,
210- feed_entry : FeedParserDict ,
211- sensor_entry : dict [str , str ],
212- ) -> None :
213- if "image" in self ._inclusions and "image" not in sensor_entry .keys ():
214- if "enclosures" in feed_entry :
215- images = [
216- enc
217- for enc in feed_entry ["enclosures" ]
218- if enc .type .startswith ("image/" )
219- ]
220- else :
221- images = []
229+ def _process_image (self : FeedParserSensor , feed_entry : FeedParserDict ) -> str :
230+ if "enclosures" in feed_entry and feed_entry ["enclosures" ]:
231+ images = [
232+ enc for enc in feed_entry ["enclosures" ] if enc .type .startswith ("image/" )
233+ ]
222234 if images :
223- sensor_entry ["image" ] = images [0 ]["href" ] # pick the first image found
224- else :
225- _LOGGER .debug (
226- "Feed %s: Image is in inclusions, but no image was found for %s" ,
235+ # pick the first image found
236+ return images [0 ]["href" ]
237+ elif "summary" in feed_entry :
238+ images = re .findall (
239+ r"<img.+?src=\"(.+?)\".+?>" ,
240+ feed_entry ["summary" ],
241+ )
242+ if images :
243+ # pick the first image found
244+ return images [0 ]
245+ _LOGGER .debug (
246+ "Feed %s: Image is in inclusions, but no image was found for %s" ,
247+ self .name ,
248+ feed_entry ,
249+ )
250+ return DEFAULT_THUMBNAIL # use default image if no image found
251+
252+ def _process_link (self : FeedParserSensor , feed_entry : FeedParserDict ) -> str :
253+ """Return link from feed entry."""
254+ if "links" in feed_entry :
255+ if len (feed_entry ["links" ]) > 1 :
256+ _LOGGER .warning (
257+ "Feed %s: More than one link found for %s. Using the first link." ,
227258 self .name ,
228259 feed_entry ,
229260 )
230- sensor_entry [
231- "image"
232- ] = DEFAULT_THUMBNAIL # use default image if no image found
261+ return feed_entry ["links" ][0 ]["href" ]
262+ return ""
233263
234264 @property
235265 def feed_entries (self : FeedParserSensor ) -> list [dict [str , str ]]:
0 commit comments