@@ -166,6 +166,7 @@ def main_watch():
166166 - close_old_connections()
167167 - load_feeds()
168168 - fetch_last_posts(settings.POSTS_DEPTH)
169+ - fetch_last_posts_bluesky(settings.POSTS_DEPTH)
169170 - tokenize_count_urls()
170171 - remove_banned_words()
171172 - focus_five_letters()
@@ -178,6 +179,7 @@ def main_watch():
178179 load_feeds ()
179180 logger .info ("Loaded feeds." )
180181 fetch_last_posts (settings .POSTS_DEPTH )
182+ fetch_last_posts_bluesky (settings .POSTS_DEPTH )
181183 logger .info ("Fetched last posts." )
182184 tokenize_count_urls ()
183185 logger .info ("Tokenized words." )
@@ -216,7 +218,7 @@ def load_feeds():
216218
217219def fetch_last_posts (nb_max_post ):
218220 """
219- Fetch the nb last posts for each feed.
221+ Fetch the nb last posts for each feed (non-Bluesky) .
220222
221223 :param nb_max_post: The deepness of the search on each feed.
222224 """
@@ -226,6 +228,8 @@ def fetch_last_posts(nb_max_post):
226228 tmp_posts = dict ()
227229 posts_published = dict ()
228230 for url in rss_urls :
231+ if "bsky.app" in url :
232+ continue
229233 try :
230234 feed_content = requests .get (url , timeout = 10 , headers = HEADERS )
231235 if feed_content .status_code == 200 :
@@ -240,7 +244,51 @@ def fetch_last_posts(nb_max_post):
240244 for entry in feed .entries :
241245 if count <= nb_max_post :
242246 count += 1
247+ dt = "no-date"
248+ parsed = entry .get ('published_parsed' ) or entry .get ('updated_parsed' )
249+ if parsed :
250+ try :
251+ dt = datetime .fromtimestamp (calendar .timegm (parsed ))
252+ except Exception :
253+ dt = "no-date"
254+ link = entry .get ('link' ) or entry .get ('guid' ) or entry .get ('id' ) or None
255+ title_raw = entry .get ('title' ) or entry .get ('summary' ) or entry .get ('description' ) or (entry .get ('guid' ) if isinstance (entry .get ('guid' ), str ) else None ) or link or ""
256+ title_clean = re .sub (r'<[^>]+>' , '' , title_raw ).replace (u'\xa0 ' , u' ' ).strip ()
257+ if link and title_clean :
258+ tmp_posts [title_clean ] = link
259+ posts_published [link ] = dt
260+ for title , url in tmp_posts .items ():
261+ posts [title ] = url
243262
263+
264+ def fetch_last_posts_bluesky (nb_max_post ):
265+ """
266+ Fetch the nb last posts for each Bluesky feed (domain 'bsky.app').
267+
268+ :param nb_max_post: The deepness of the search on each feed.
269+ """
270+ global posts
271+ global posts_published
272+ posts = dict ()
273+ tmp_posts = dict ()
274+ posts_published = dict ()
275+ for url in rss_urls :
276+ if "bsky.app" not in url :
277+ continue
278+ try :
279+ feed_content = requests .get (url , timeout = 10 )
280+ if feed_content .status_code == 200 :
281+ feeds .append (feedparser .parse (feed_content .text ))
282+ else :
283+ logger .warning (f"Feed: { url } => Error: Status code: { feed_content .status_code } " )
284+ except requests .exceptions .RequestException as e :
285+ logger .error (str (e ))
286+
287+ for feed in feeds :
288+ count = 1
289+ for entry in feed .entries :
290+ if count <= nb_max_post :
291+ count += 1
244292 dt = "no-date"
245293 parsed = entry .get ('published_parsed' ) or entry .get ('updated_parsed' )
246294 if parsed :
0 commit comments