Skip to content

Commit 19f58c7

Browse files
committed
feat: Respect Cache-Control: max-age header
This change implements support for the `Cache-Control: max-age` HTTP header to avoid fetching feeds that have not expired. The `Feed` class now stores `last_checked` and `max_age` attributes. Before fetching a feed, the application checks if the cached version is still valid based on these attributes. If the cache is still fresh, the network request is skipped. After a successful fetch, the `Cache-Control` header is parsed from the response, and the `last_checked` and `max_age` attributes are updated. This ensures that subsequent fetches will respect the cache duration specified by the feed provider. This commit adds a test case to verify that the `Cache-Control: max-age` header is respected. The test starts a webserver that serves a feed with a `max-age` value, runs the feed to populate the cache, and then checks that the feed is not fetched again before the `max-age` expires, and is fetched again after it expires. Fixes: rss2email#286
1 parent 074ca93 commit 19f58c7

File tree

2 files changed

+73
-0
lines changed

2 files changed

+73
-0
lines changed

rss2email/feed.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ class Feed (object):
171171
'etag',
172172
'modified',
173173
'seen',
174+
'max_age',
175+
'last_checked',
174176
]
175177

176178
## saved/loaded from ConfigParser instance
@@ -352,6 +354,8 @@ def reset(self):
352354
self.etag = None
353355
self.modified = None
354356
self.seen = {} # type: Dict[str, Dict[str, Any]]
357+
self.max_age = None
358+
self.last_checked = None
355359

356360
def _set_name(self, name):
357361
if not self._name_regexp.match(name):
@@ -369,6 +373,13 @@ def _fetch(self):
369373
>>> parsed.status
370374
200
371375
"""
376+
if (self.max_age is not None and self.last_checked is not None and
377+
_time.time() < self.last_checked + self.max_age):
378+
_LOG.info('skipping {}: cache has not expired'.format(self.name))
379+
parsed = _feedparser.FeedParserDict()
380+
parsed['status'] = 304
381+
return parsed
382+
372383
_LOG.info('fetch {}'.format(self))
373384
if not self.url:
374385
raise _error.InvalidFeedConfig(setting='url', feed=self)
@@ -937,6 +948,17 @@ def run(self, send=True, clean=False):
937948
self.modified = None
938949
parsed = self._fetch()
939950

951+
if parsed.status != 304:
952+
self.last_checked = _time.time()
953+
if 'cache-control' in parsed.headers:
954+
match = _re.search(r'max-age=(\d+)', parsed.headers['cache-control'])
955+
if match:
956+
self.max_age = int(match.group(1))
957+
else:
958+
self.max_age = None
959+
else:
960+
self.max_age = None
961+
940962
if clean and len(parsed.entries) > 0:
941963
for guid in self.seen:
942964
self.seen[guid]['old'] = True

test/test.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,24 @@ def webserver_for_test_if_fetch(queue, timeout):
238238
finally:
239239
httpd.server_close()
240240

241+
def webserver_for_test_cache_control(queue, max_age):
242+
class CacheControlHandler(NoLogHandler):
243+
def do_GET(self):
244+
self.send_response(200)
245+
self.send_header('Cache-Control', 'max-age={}'.format(max_age))
246+
self.end_headers()
247+
with open(_os.path.join(test_dir, 'disqus/feed.rss'), 'rb') as f:
248+
self.wfile.write(f.read())
249+
250+
httpd = http.server.HTTPServer(('', 0), CacheControlHandler)
251+
try:
252+
port = httpd.server_address[1]
253+
queue.put(port)
254+
while queue.get() != "stop":
255+
httpd.handle_request()
256+
finally:
257+
httpd.server_close()
258+
241259
class TestFetch(unittest.TestCase):
242260
"Retrieving feeds from servers"
243261
def test_delay(self):
@@ -368,6 +386,39 @@ def test_only_new(self):
368386
self.assertIn("seen", content["feeds"][0])
369387
self.assertEqual(queue.get(), "done")
370388

389+
def test_cache_control(self):
390+
"Respects Cache-Control: max-age header"
391+
max_age = 5
392+
cfg = """[DEFAULT]
393+
394+
395+
queue = multiprocessing.Queue()
396+
webserver_proc = multiprocessing.Process(target=webserver_for_test_cache_control, args=(queue, max_age))
397+
webserver_proc.start()
398+
port = queue.get()
399+
400+
with ExecContext(cfg) as ctx:
401+
ctx.call("add", 'test', 'http://127.0.0.1:{port}/disqus/feed.rss'.format(port = port))
402+
403+
# First run, should fetch
404+
queue.put("next")
405+
p = ctx.call("run", "--no-send")
406+
self.assertIn("fetch", p.stderr)
407+
408+
# Second run, should be cached
409+
p = ctx.call("run", "--no-send")
410+
self.assertIn("cache has not expired", p.stderr)
411+
412+
# Wait for cache to expire
413+
time.sleep(max_age + 1)
414+
415+
# Third run, should fetch again
416+
queue.put("next")
417+
p = ctx.call("run", "--no-send")
418+
self.assertIn("fetch", p.stderr)
419+
420+
queue.put("stop")
421+
371422

372423
def webserver_for_test_send(queue):
373424
httpd = http.server.HTTPServer(('', 0), NoLogHandler)

0 commit comments

Comments
 (0)