@@ -108,6 +108,7 @@ def __init__(
108108 self .__check = True
109109 self .bug_reporter = bug_reporter if bug_reporter else None
110110 self .debug = False
111+ self .logger = logging .getLogger ('RSSBot' )
111112
112113 if debug :
113114 Handlers .add_debuging_handlers (self )
@@ -144,7 +145,7 @@ def __init__(
144145
145146 def log_bug (self , exc :Exception , msg = '' , report = True , disable_notification = False ,** args ):
146147 info = BugReporter .exception (msg , exc , report = self .bug_reporter and report )
147- logging .exception (msg , exc_info = exc )
148+ self . logger .exception (msg , exc_info = exc )
148149 msg = html .escape (msg )
149150 escaped_info = {k :html .escape (str (v )) for k ,v in info .items ()}
150151 message = (
@@ -200,7 +201,9 @@ def purge(self, html, images=True) -> Soup:
200201
201202 @retry (10 )
202203 def get_feeds (self ):
204+ self .logger .info ('Getting feeds' )
203205 with urlopen (self .feed_configs ['source' ]) as f :
206+ self .logger .info ('Got feeds' )
204207 return f .read ().decode ('utf-8' )
205208
206209 def summarize (self , soup :Soup , max_length , read_more ):
@@ -250,6 +253,7 @@ def read_feed(self, index=0):
250253
251254 soup_page = Soup (feeds_page , self .feed_configs .get ('feed-format' , 'xml' ))
252255 feeds_list = soup_page .select (self .feed_configs ['feeds-selector' ])
256+ self .logger .info (f'Got { len (feeds_list )} feeds' )
253257 title , link , content , time = None , None , None , None
254258 for feed in feeds_list [index :]:
255259 try :
@@ -282,12 +286,15 @@ def read_feed(self, index=0):
282286 continue
283287
284288 time_selector = self .feed_configs ['time-selector' ]
285- if time_selector :
286- # date-selector could be None (null)
287- if self .feed_configs ['time-attribute' ]:
288- time = str (feed .select_one (time_selector ).attrs [self .feed_configs ['time-attribute' ]])
289- else :
290- time = str (feed .select_one (time_selector ).text )
289+ # date-selector could not be None (null)
290+ if self .feed_configs ['time-attribute' ]:
291+ time = str (feed .select_one (time_selector ).attrs [self .feed_configs ['time-attribute' ]])
292+ else :
293+ time = str (feed .select_one (time_selector ).text )
294+
295+ if time is None :
296+ self .logger .error ('The feed does not have a date, which means that the "date-selector" is not configured correctly' )
297+ self .logger .info ('The feed was\n ' + str (feed ))
291298
292299 content_selector = self .feed_configs ['content-selector' ]
293300 if content_selector :
@@ -310,6 +317,7 @@ def read_feed(self, index=0):
310317
311318 def render_feed (self , feed : dict , header : str ):
312319 title = feed ['title' ]
320+ self .logger .debug (f'Rendering feed { title } ' )
313321 post_link = feed ['link' ]
314322 content = feed ['content' ]
315323 messages = [{
@@ -333,6 +341,7 @@ def render_feed(self, feed: dict, header: str):
333341 content = self .purge (content )
334342 images = content .find_all ('img' )
335343 first = True
344+ self .logger .debug (f'Found { len (images )} images' )
336345
337346 if not len (images ):
338347 content , overflow = self .summarize (content , self .MAX_MSG_LEN , self .get_string ('read-more' ))
@@ -452,20 +461,20 @@ def iter_all_chats(self):
452461
453462 def check_new_feed (self ):
454463 last_date = self .get_data ('last-feed-date' , DB = self .data_db )
455- latest_date = last_date
464+ new_date = last_date
456465 for feed in self .read_feed ():
457- feed_date = parse_date (feed ['date' ]) if feed ['date' ] else None
458- if feed_date is not None and (last_date is None or latest_date < feed_date ): # if feed_date is not None and last_date not exist or last feed's date is older than the new one
459- self .set_data ('last-feed-date' , feed_date , DB = self .data_db )
460- latest_date = feed_date
461- if feed_date is None or (last_date is not None and last_date < feed_date ):
466+ date = parse_date (feed ['date' ]) if feed ['date' ] else None
467+ if date is None or last_date is not None and last_date < date :
468+ new_date = max (date , new_date )
469+ self .logger .info (f'Sending new feed. date: { date } ' )
462470 messages = self .render_feed (feed , header = self .get_string ('new-feed' ))
463471 self .send_feed (messages , self .iter_all_chats ())
464- if feed_date is None or last_date is None :
465- break #just send last feed
466- else :
467- break #no new feed
472+ if date is None or date <= last_date :
473+ self . logger . info ( 'No more new feeds' )
474+ break
475+ self . set_data ( 'last-feed-date' , new_date , DB = self . data_db )
468476 if self .__check :
477+ self .logger .info (f'Checking for new feeds in { self .interval } seconds' )
469478 self .check_thread = Timer (self .interval , self .check_new_feed )
470479 self .check_thread .start ()
471480
0 commit comments