Skip to content

Commit 8b1ac92

Browse files
committed
Refactored RSS feed handling by adding constants.py and test_handler.py
- Introduced constants.py to store newsfeed URLs and related configuration. - Updated handler.py for improved flexibility in handling additional RSS feeds. - Completed issues #22 and #23 by streamlining feed management.
1 parent 83afc3b commit 8b1ac92

File tree

4 files changed

+84
-13
lines changed

4 files changed

+84
-13
lines changed
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
boto3
22
feedparser
3-
pytz
3+
pytz
4+
unittest
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import os
2+
3+
TABLE_ARN = os.environ.get("DYNAMODB_TABLE_ARN")
4+
ARTIFACT_TYPE = "newsletter"
5+
FEEDS = [
6+
{"name": "Bleeping Computer", "url": "https://www.bleepingcomputer.com/feed/"},
7+
{"name": "The Hacker News", "url": "https://feeds.feedburner.com/TheHackersNews"},
8+
{"name": "CNBC Technology",
9+
"url": "https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=15839069"},
10+
{"name": "TechCrunch", "url": "https://techcrunch.com/feed"},
11+
]

lambdas/security-newsletter/src/handler.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,47 @@
1-
import os
21
import logging
32
from datetime import datetime, timedelta
4-
53
import boto3
64
import feedparser
75
import pytz
8-
9-
10-
TABLE_ARN = os.environ["DYNAMODB_TABLE_ARN"]
11-
ARTIFACT_TYPE = "newsletter"
6+
from constants import ARTIFACT_TYPE, TABLE_ARN, FEEDS
127

138
# Logging Configuration
149
logging.getLogger().setLevel(logging.INFO)
1510

11+
class NewsFeedFetcher:
12+
def __init__(self, feed_name, feed_url):
13+
"""
14+
Initialize the RSS feed fetcher with a name and feed URL.
15+
:param feed_name: A descriptive name for the feed (e.g., "Bleeping Computer")
16+
:param feed_url: The RSS feed URL to fetch articles from
17+
"""
18+
self.feed_name = feed_name
19+
self.feed_url = feed_url
20+
21+
def fetch_articles(self):
22+
"""
23+
Fetch articles from the specified RSS feed.
24+
returns a list of dictionaries containing the articles
25+
"""
26+
feed = feedparser.parse(self.feed_url)
27+
if feed.bozo:
28+
raise ValueError(f"Error parsing feed '{self.feed_name}': {feed.bozo_exception}")
29+
30+
articles = []
31+
for entry in feed.entries:
32+
articles.append(
33+
{
34+
"title": entry.title,
35+
"link": entry.link,
36+
"published": entry.get("published", "N/A"),
37+
"summary": entry.get("summary", "N/A"),
38+
}
39+
)
40+
return articles
41+
42+
def __repr__(self):
43+
return f"NewsFeedFetcher(feed_name='{self.feed_name}', feed_url='{self.feed_url}')"
44+
1645

1746
def main(event, _):
1847
"""
@@ -21,12 +50,15 @@ def main(event, _):
2150
"""
2251
logging.info("Event: %s", event)
2352

24-
# Fetch articles from both feeds
25-
bleeping_articles = fetch_bleeping_computer_rss()
26-
hacker_articles = fetch_hacker_news_rss()
27-
28-
# Combine articles from both feeds
29-
all_articles = bleeping_articles + hacker_articles
53+
all_articles = []
54+
for feed_info in FEEDS:
55+
fetcher = NewsFeedFetcher(feed_info["name"], feed_info["url"])
56+
try:
57+
articles = fetcher.fetch_articles()
58+
logging.info(f"Fetched {len(articles)} articles from {feed_info['name']}.")
59+
all_articles.extend(articles)
60+
except ValueError as e:
61+
logging.error(f"Error fetching articles from {feed_info['name']}: {e}")
3062

3163
# Get today's articles from the combined list
3264
latest_articles = get_latest_article_with_timezone(all_articles)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import unittest
2+
from handler import NewsFeedFetcher, get_latest_article_with_timezone
3+
from constants import FEEDS
4+
5+
class TestNewsFeedFetcher(unittest.TestCase):
6+
def test_fetch_and_parse_dates(self):
7+
"""
8+
Test the functionality of fetching feeds.
9+
"""
10+
11+
all_articles = []
12+
for feed_info in FEEDS:
13+
fetcher = NewsFeedFetcher(feed_info["name"], feed_info["url"])
14+
try:
15+
articles = fetcher.fetch_articles()
16+
all_articles.extend(articles)
17+
except ValueError as e:
18+
self.fail(f"Error with feed '{feed_info['name']}': {e}")
19+
20+
latest_articles = get_latest_article_with_timezone(all_articles)
21+
print(f"Total articles fetched: {len(latest_articles)}")
22+
print(f"Today's articles: {len(latest_articles)}")
23+
24+
self.assertGreater(len(latest_articles), 0, "No articles were fetched.")
25+
26+
if __name__ == "__main__":
27+
unittest.main()

0 commit comments

Comments
 (0)