@@ -125,7 +125,7 @@ def __init__(
125125 # - link-selector: how to get link of source
126126 # - content-selector: how to get content
127127 # - skip-condition: how to check skip condition
128- # - format: feed/{selector}, content/{selector}, title/{regex}, none
128+ # - format: feed/{selector}, content/{selector}, title/{regex}, link/{regex}, none
129129
130130 self .__skip = lambda feed : False
131131 skip_condition = feed_configs .get ('feed-skip-condition' )
@@ -138,6 +138,9 @@ def __init__(
138138 elif self .__skip_field == 'title' :
139139 match = re .compile (skip_condition ).match
140140 self .__skip = lambda title : bool (match (title ))
141+ elif self .__skip_field == 'link' :
142+ match = re .compile (skip_condition ).match
143+ self .__skip = lambda link : bool (match (link ))
141144
142145 def log_bug (self , exc :Exception , msg = '' , report = True , disable_notification = False ,** args ):
143146 info = BugReporter .exception (msg , exc , report = self .bug_reporter and report )
@@ -247,7 +250,7 @@ def read_feed(self, index=0):
247250
248251 soup_page = Soup (feeds_page , self .feed_configs .get ('feed-format' , 'xml' ))
249252 feeds_list = soup_page .select (self .feed_configs ['feeds-selector' ])
250- title , link , content , date = None , None , None , None
253+ title , link , content , time = None , None , None , None
251254 for feed in feeds_list [index :]:
252255 try :
253256 if self .__skip_field == 'feed' :
@@ -257,17 +260,34 @@ def read_feed(self, index=0):
257260 title_selector = self .feed_configs ['title-selector' ]
258261 if title_selector :
259262 # title-selector could be None (null)
260- title = str (feed .select (title_selector )[0 ].text )
263+ if self .feed_configs ['title-attribute' ]:
264+ title = str (feed .select_one (title_selector ).attrs [self .feed_configs ['title-attribute' ]])
265+ else :
266+ title = str (feed .select_one (title_selector ).text )
261267
262268 if self .__skip_field == 'title' :
263269 if self .__skip (title ):
264270 continue
265271
266272 link_selector = self .feed_configs ['link-selector' ]
267273 if link_selector :
268- link = str (feed .select (link_selector )[0 ].text )
274+ # link-selector could be None (null)
275+ if self .feed_configs ['link-attribute' ]:
276+ link = str (feed .select_one (link_selector ).attrs [self .feed_configs ['link-attribute' ]])
277+ else :
278+ link = str (feed .select_one (link_selector ).text )
269279
270- date = str (feed .select (self .feed_configs ['date-selector' ])[0 ].text )
280+ if self .__skip_field == 'link' :
281+ if self .__skip (link ):
282+ continue
283+
284+ time_selector = self .feed_configs ['time-selector' ]
285+ if time_selector :
286+ # date-selector could be None (null)
287+ if self .feed_configs ['time-attribute' ]:
288+ time = str (feed .select_one (time_selector ).attrs [self .feed_configs ['time-attribute' ]])
289+ else :
290+ time = str (feed .select_one (time_selector ).text )
271291
272292 content_selector = self .feed_configs ['content-selector' ]
273293 if content_selector :
@@ -285,7 +305,7 @@ def read_feed(self, index=0):
285305 'title' : title ,
286306 'link' : link ,
287307 'content' : content ,
288- 'date' : date
308+ 'date' : time
289309 }
290310
291311 def render_feed (self , feed : dict , header : str ):
@@ -433,15 +453,14 @@ def iter_all_chats(self):
433453
434454 def check_new_feed (self ):
435455 last_date = self .get_data ('last-feed-date' , DB = self .data_db )
436- skip_date_check = not self .feed_configs .get ('check-date' ,True )
437456 for feed in self .read_feed ():
438457 feed_date = parse_date (feed ['date' ])
439- if not last_date or last_date < feed_date : # if last_date not exist or last feed's date is older than the new one
458+ if feed_date and ( not last_date or last_date < feed_date ) : # if feed_date is not None and last_date not exist or last feed's date is older than the new one
440459 self .set_data ('last-feed-date' , feed_date , DB = self .data_db )
441- if skip_date_check or (last_date and last_date < feed_date ):
460+ if not feed_date or (last_date and last_date < feed_date ):
442461 messages = self .render_feed (feed , header = self .get_string ('new-feed' ))
443462 self .send_feed (messages , self .iter_all_chats ())
444- if skip_date_check :
463+ if not feed_date or not last_date :
445464 break #just send last feed
446465 if self .__check :
447466 self .check_thread = Timer (self .interval , self .check_new_feed )
0 commit comments