Skip to content

Commit 13d7df2

Browse files
committed
Update core.py
1 parent c4a9378 commit 13d7df2

File tree

1 file changed

+49
-1
lines changed
  • Watcher/Watcher/threats_watcher

1 file changed

+49
-1
lines changed

Watcher/Watcher/threats_watcher/core.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ def main_watch():
166166
- close_old_connections()
167167
- load_feeds()
168168
- fetch_last_posts(settings.POSTS_DEPTH)
169+
- fetch_last_posts_bluesky(settings.POSTS_DEPTH)
169170
- tokenize_count_urls()
170171
- remove_banned_words()
171172
- focus_five_letters()
@@ -178,6 +179,7 @@ def main_watch():
178179
load_feeds()
179180
logger.info("Loaded feeds.")
180181
fetch_last_posts(settings.POSTS_DEPTH)
182+
fetch_last_posts_bluesky(settings.POSTS_DEPTH)
181183
logger.info("Fetched last posts.")
182184
tokenize_count_urls()
183185
logger.info("Tokenized words.")
@@ -216,7 +218,7 @@ def load_feeds():
216218

217219
def fetch_last_posts(nb_max_post):
218220
"""
219-
Fetch the nb last posts for each feed.
221+
Fetch the nb last posts for each feed (non-Bluesky) .
220222
221223
:param nb_max_post: The deepness of the search on each feed.
222224
"""
@@ -226,6 +228,8 @@ def fetch_last_posts(nb_max_post):
226228
tmp_posts = dict()
227229
posts_published = dict()
228230
for url in rss_urls:
231+
if "bsky.app" in url:
232+
continue
229233
try:
230234
feed_content = requests.get(url, timeout=10, headers=HEADERS)
231235
if feed_content.status_code == 200:
@@ -240,7 +244,51 @@ def fetch_last_posts(nb_max_post):
240244
for entry in feed.entries:
241245
if count <= nb_max_post:
242246
count += 1
247+
dt = "no-date"
248+
parsed = entry.get('published_parsed') or entry.get('updated_parsed')
249+
if parsed:
250+
try:
251+
dt = datetime.fromtimestamp(calendar.timegm(parsed))
252+
except Exception:
253+
dt = "no-date"
254+
link = entry.get('link') or entry.get('guid') or entry.get('id') or None
255+
title_raw = entry.get('title') or entry.get('summary') or entry.get('description') or (entry.get('guid') if isinstance(entry.get('guid'), str) else None) or link or ""
256+
title_clean = re.sub(r'<[^>]+>', '', title_raw).replace(u'\xa0', u' ').strip()
257+
if link and title_clean:
258+
tmp_posts[title_clean] = link
259+
posts_published[link] = dt
260+
for title, url in tmp_posts.items():
261+
posts[title] = url
243262

263+
264+
def fetch_last_posts_bluesky(nb_max_post):
265+
"""
266+
Fetch the nb last posts for each Bluesky feed (domain 'bsky.app').
267+
268+
:param nb_max_post: The deepness of the search on each feed.
269+
"""
270+
global posts
271+
global posts_published
272+
posts = dict()
273+
tmp_posts = dict()
274+
posts_published = dict()
275+
for url in rss_urls:
276+
if "bsky.app" not in url:
277+
continue
278+
try:
279+
feed_content = requests.get(url, timeout=10)
280+
if feed_content.status_code == 200:
281+
feeds.append(feedparser.parse(feed_content.text))
282+
else:
283+
logger.warning(f"Feed: {url} => Error: Status code: {feed_content.status_code}")
284+
except requests.exceptions.RequestException as e:
285+
logger.error(str(e))
286+
287+
for feed in feeds:
288+
count = 1
289+
for entry in feed.entries:
290+
if count <= nb_max_post:
291+
count += 1
244292
dt = "no-date"
245293
parsed = entry.get('published_parsed') or entry.get('updated_parsed')
246294
if parsed:

0 commit comments

Comments
 (0)