Skip to content

Commit 5384a3e

Browse files
authored
feat(cache): send an ETag header with rendered pages for better caching capabilities (#499)
The ETag is a sha256 hash over the fully rendered page. In constrast to Last-Modified, this value is stable between different runs of the server. It only changes when there is an actual content change.
1 parent 1484045 commit 5384a3e

2 files changed

Lines changed: 31 additions & 7 deletions

File tree

truewiki/views/page.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import aiohttp
12
import click
3+
import hashlib
24
import os
35
import time
46

@@ -32,7 +34,7 @@ def _view(wiki_page, user, page: str) -> web.Response:
3234
return wrap_page(page, "Page", variables, templates)
3335

3436

35-
def view(user, page: str, if_modified_since) -> web.Response:
37+
def view(user, page: str, if_modified_since, if_none_match) -> web.Response:
3638
if page.endswith("/"):
3739
page += "Main Page"
3840

@@ -68,16 +70,22 @@ def view(user, page: str, if_modified_since) -> web.Response:
6870
if can_cache and namespaced_page in metadata.LAST_TIME_RENDERED:
6971
if (
7072
if_modified_since is not None
71-
and metadata.LAST_TIME_RENDERED[namespaced_page] <= if_modified_since.timestamp()
73+
and metadata.LAST_TIME_RENDERED[namespaced_page][0] <= if_modified_since.timestamp()
7274
):
7375
# We already rendered this page before. If the browser has it in his
7476
# cache, he can simply reuse that if we haven't rendered since.
7577
response = web.HTTPNotModified()
78+
elif (
79+
not user and if_none_match is not None and metadata.LAST_TIME_RENDERED[namespaced_page][1] == if_none_match
80+
):
81+
# We already rendered this page before. If the browser has it in his
82+
# cache, he can simply reuse that if the content is still the same.
83+
response = web.HTTPNotModified()
7684
elif (
7785
not user
7886
and cache_filename
7987
and os.path.exists(cache_filename)
80-
and os.path.getmtime(cache_filename) >= metadata.LAST_TIME_RENDERED[namespaced_page]
88+
and os.path.getmtime(cache_filename) >= metadata.LAST_TIME_RENDERED[namespaced_page][0]
8189
):
8290
# We already rendered this page to disk. Serve from there.
8391
with open(cache_filename) as fp:
@@ -105,13 +113,28 @@ def view(user, page: str, if_modified_since) -> web.Response:
105113
# Only update the time if we don't have one yet. This makes sure
106114
# that LAST_TIME_RENDERED has the oldest timestamp possible.
107115
if namespaced_page not in metadata.LAST_TIME_RENDERED:
108-
metadata.LAST_TIME_RENDERED[namespaced_page] = page_time
116+
metadata.LAST_TIME_RENDERED[namespaced_page] = (page_time, None)
109117

110-
response = web.Response(body=body, content_type="text/html", status=status_code)
118+
# Update the ETag if we don't have one yet. We only generate ETags for anonymous users.
119+
if not user and metadata.LAST_TIME_RENDERED[namespaced_page][1] is None:
120+
etag = hashlib.sha256(body.encode("utf-8")).hexdigest()
121+
metadata.LAST_TIME_RENDERED[namespaced_page] = (metadata.LAST_TIME_RENDERED[namespaced_page][0], etag)
122+
123+
if if_none_match is not None and etag == if_none_match:
124+
# Now we rendered the page, we find out that the etag did match after all.
125+
# Return this information to the client, instead of the payload.
126+
response = web.HTTPNotModified()
127+
128+
if response is None:
129+
response = web.Response(body=body, content_type="text/html", status=status_code)
111130

112131
# Inform the browser under which rules it can cache this page.
113132
if can_cache:
114-
response.last_modified = metadata.LAST_TIME_RENDERED[namespaced_page]
133+
response.last_modified = metadata.LAST_TIME_RENDERED[namespaced_page][0]
134+
if not user:
135+
# ETags are weak, as we don't actually know if we are byte-for-byte the same because
136+
# of things like gzip compression.
137+
response.etag = aiohttp.ETag(metadata.LAST_TIME_RENDERED[namespaced_page][1], is_weak=True)
115138
response.headers["Vary"] = "Accept-Encoding, Cookie"
116139
response.headers["Cache-Control"] = "private, must-revalidate, max-age=0"
117140
return response

truewiki/web_routes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ async def html_page(request):
242242
_validate_page(page)
243243

244244
if_modified_since = request.if_modified_since
245-
return view_page.view(user, page, if_modified_since)
245+
if_none_match = request.if_none_match[0].value if request.if_none_match else None
246+
return view_page.view(user, page, if_modified_since, if_none_match)
246247

247248

248249
@routes.route("*", "/{tail:.*}")

0 commit comments

Comments
 (0)