Skip to content

Commit 2bac0a2

Browse files
authored
Cache added to avoid recompiling unchanged notebooks (#267)
1 parent 98f3dbb commit 2bac0a2

File tree

3 files changed

+324
-6
lines changed

3 files changed

+324
-6
lines changed

README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,30 @@ you can create a `main.html` file like this:
258258

259259
![Download Notebook button](https://raw.githubusercontent.com/danielfrg/mkdocs-jupyter/master/docs/download-button.png)
260260

261+
### Caching
262+
263+
By default the plugin caches notebook conversion results so that unchanged
264+
notebooks are not re-converted on every build. The cache key is based on the
265+
notebook file content and all relevant plugin config options.
266+
267+
To disable caching:
268+
269+
```yaml
270+
plugins:
271+
- mkdocs-jupyter:
272+
cache: false
273+
```
274+
275+
To change the cache directory (default is `.cache/mkdocs-jupyter`):
276+
277+
```yaml
278+
plugins:
279+
- mkdocs-jupyter:
280+
cache_dir: .cache/custom-dir
281+
```
282+
283+
Stale cache entries from previous builds are automatically cleaned up.
284+
261285
## Styles
262286

263287
This extensions includes the Jupyter Lab nbconvert CSS styles and does some

src/mkdocs_jupyter/plugin.py

Lines changed: 101 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import hashlib
2+
import json
13
import logging
24
import os
35
import pathlib
@@ -54,6 +56,8 @@ class Plugin(mkdocs.plugins.BasePlugin):
5456
("toc_depth", config_options.Type(int, default=6)),
5557
("data_files", config_options.Type(dict, default={})),
5658
("custom_mathjax_url", config_options.Type(str, default="")),
59+
("cache", config_options.Type(bool, default=True)),
60+
("cache_dir", config_options.Type(str, default=".cache/mkdocs-jupyter")),
5761
)
5862
_supported_extensions = [".ipynb", ".py", ".md"]
5963

@@ -84,6 +88,9 @@ def should_include(self, file):
8488
return True
8589
return False
8690

91+
def on_pre_build(self, config):
92+
self._used_cache_paths = set()
93+
8794
def on_files(self, files, config):
8895
ret = Files(
8996
[
@@ -117,8 +124,28 @@ def on_pre_page(self, page, config, files):
117124
exec_nb = False
118125

119126
theme = self.config["theme"]
127+
cache_enabled = self.config["cache"]
128+
cache_dir = self.config["cache_dir"]
129+
130+
if cache_enabled:
131+
cache_key = _compute_cache_key(
132+
page.file.abs_src_path, self.config, exec_nb
133+
)
134+
cache_path = _get_cache_path(cache_dir, cache_key)
135+
self._used_cache_paths.add(cache_path)
136+
else:
137+
cache_path = None
120138

121139
def new_render(self, config, files):
140+
if cache_path and cache_path.exists():
141+
logger.info("Cache hit: %s", page.file.abs_src_path)
142+
cached = json.loads(cache_path.read_text(encoding="utf-8"))
143+
self.content = cached["content"]
144+
self.toc = get_toc(cached["toc_tokens"])
145+
if cached.get("title") is not None and not ignore_h1_titles:
146+
self.title = cached["title"]
147+
return
148+
122149
body = convert.nb2html(
123150
page.file.abs_src_path,
124151
execute=exec_nb,
@@ -133,11 +160,27 @@ def new_render(self, config, files):
133160
custom_mathjax_url=custom_mathjax_url,
134161
)
135162
self.content = body
136-
toc, title = get_nb_toc(page.file.abs_src_path, toc_depth)
137-
self.toc = toc
163+
toc_tokens, title = _get_nb_toc_tokens(
164+
page.file.abs_src_path, toc_depth
165+
)
166+
self.toc = get_toc(toc_tokens)
138167
if title is not None and not ignore_h1_titles:
139168
self.title = title
140169

170+
if cache_path:
171+
logger.info("Cache miss, writing: %s", page.file.abs_src_path)
172+
cache_path.parent.mkdir(parents=True, exist_ok=True)
173+
cache_path.write_text(
174+
json.dumps(
175+
{
176+
"content": body,
177+
"toc_tokens": toc_tokens,
178+
"title": title,
179+
}
180+
),
181+
encoding="utf-8",
182+
)
183+
141184
# replace render with new_render for this object only
142185
page.render = new_render.__get__(page, Page)
143186

@@ -182,22 +225,74 @@ def on_post_page(self, output_content, page, config):
182225
copyfile(data_source, data_target)
183226
logger.info("Copied data files: %s to %s", data_files, data_target_dir)
184227

228+
def on_post_build(self, config):
229+
if not self.config["cache"]:
230+
return
231+
cache_dir = pathlib.Path(self.config["cache_dir"])
232+
if not cache_dir.is_dir():
233+
return
234+
for cache_file in cache_dir.glob("*.json"):
235+
if cache_file not in self._used_cache_paths:
236+
cache_file.unlink()
237+
logger.info("Evicted stale cache: %s", cache_file)
238+
185239

186240
def _get_markdown_toc(markdown_source, toc_depth):
187241
md = markdown.Markdown(extensions=[TocExtension(toc_depth=toc_depth)])
188242
md.convert(markdown_source)
189243
return md.toc_tokens
190244

191245

192-
def get_nb_toc(fpath, toc_depth):
193-
"""Returns a TOC for the Notebook
194-
It does that by converting first to MD
246+
def _get_nb_toc_tokens(fpath, toc_depth):
247+
"""Returns raw TOC tokens and title for the Notebook.
248+
249+
Converts to Markdown first, then extracts TOC tokens.
250+
Returns (toc_tokens, title) where toc_tokens is a list of dicts.
195251
"""
196252
body = convert.nb2md(fpath)
197253
md_toc_tokens = _get_markdown_toc(body, toc_depth)
198-
toc = get_toc(md_toc_tokens)
199254
title = None
200255
for token in md_toc_tokens:
201256
if token["level"] == 1 and title is None:
202257
title = token["name"]
258+
return md_toc_tokens, title
259+
260+
261+
def get_nb_toc(fpath, toc_depth):
262+
"""Returns a TOC for the Notebook
263+
It does that by converting first to MD
264+
"""
265+
md_toc_tokens, title = _get_nb_toc_tokens(fpath, toc_depth)
266+
toc = get_toc(md_toc_tokens)
203267
return toc, title
268+
269+
270+
def _compute_cache_key(nb_path, config, exec_nb):
271+
"""Compute a SHA-256 hash from notebook content and relevant config options.
272+
273+
Uses the resolved exec_nb value (after execute_ignore processing) rather
274+
than config["execute"], so that notebooks in execute_ignore get a distinct
275+
cache key.
276+
"""
277+
hasher = hashlib.sha256()
278+
hasher.update(pathlib.Path(nb_path).read_bytes())
279+
hasher.update(f"execute={exec_nb}".encode())
280+
for key in (
281+
"kernel_name",
282+
"theme",
283+
"allow_errors",
284+
"show_input",
285+
"no_input",
286+
"remove_tag_config",
287+
"highlight_extra_classes",
288+
"include_requirejs",
289+
"custom_mathjax_url",
290+
"toc_depth",
291+
):
292+
hasher.update(f"{key}={repr(config[key])}".encode())
293+
return hasher.hexdigest()
294+
295+
296+
def _get_cache_path(cache_dir, cache_key):
297+
"""Return the Path for a given cache key."""
298+
return pathlib.Path(cache_dir) / f"{cache_key}.json"
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
import json
2+
import os
3+
import pathlib
4+
import tempfile
5+
from unittest.mock import patch
6+
7+
import pytest
8+
9+
from mkdocs_jupyter.plugin import _compute_cache_key, _get_cache_path
10+
11+
12+
@pytest.fixture
13+
def sample_nb(tmp_path):
14+
"""Create a minimal .ipynb file for cache key tests."""
15+
nb = {
16+
"cells": [
17+
{
18+
"cell_type": "markdown",
19+
"metadata": {},
20+
"source": ["# Hello World"],
21+
}
22+
],
23+
"metadata": {
24+
"kernelspec": {
25+
"display_name": "Python 3",
26+
"language": "python",
27+
"name": "python3",
28+
}
29+
},
30+
"nbformat": 4,
31+
"nbformat_minor": 5,
32+
}
33+
nb_path = tmp_path / "test.ipynb"
34+
nb_path.write_text(json.dumps(nb))
35+
return nb_path
36+
37+
38+
@pytest.fixture
39+
def base_config():
40+
return {
41+
"kernel_name": "",
42+
"theme": "",
43+
"allow_errors": True,
44+
"show_input": True,
45+
"no_input": False,
46+
"remove_tag_config": {},
47+
"highlight_extra_classes": "",
48+
"include_requirejs": False,
49+
"custom_mathjax_url": "",
50+
"toc_depth": 6,
51+
}
52+
53+
54+
class TestComputeCacheKey:
55+
def test_same_file_same_config_same_key(self, sample_nb, base_config):
56+
key1 = _compute_cache_key(str(sample_nb), base_config, False)
57+
key2 = _compute_cache_key(str(sample_nb), base_config, False)
58+
assert key1 == key2
59+
60+
def test_different_file_content_different_key(self, tmp_path, base_config):
61+
nb1 = tmp_path / "nb1.ipynb"
62+
nb2 = tmp_path / "nb2.ipynb"
63+
nb1.write_text(json.dumps({"cells": [], "metadata": {}, "nbformat": 4}))
64+
nb2.write_text(
65+
json.dumps(
66+
{
67+
"cells": [{"cell_type": "code"}],
68+
"metadata": {},
69+
"nbformat": 4,
70+
}
71+
)
72+
)
73+
74+
key1 = _compute_cache_key(str(nb1), base_config, False)
75+
key2 = _compute_cache_key(str(nb2), base_config, False)
76+
assert key1 != key2
77+
78+
def test_different_config_different_key(self, sample_nb, base_config):
79+
key1 = _compute_cache_key(str(sample_nb), base_config, False)
80+
81+
config2 = {**base_config, "allow_errors": False}
82+
key2 = _compute_cache_key(str(sample_nb), config2, False)
83+
assert key1 != key2
84+
85+
def test_different_exec_nb_different_key(self, sample_nb, base_config):
86+
"""Resolved exec_nb (after execute_ignore) affects the key."""
87+
key1 = _compute_cache_key(str(sample_nb), base_config, True)
88+
key2 = _compute_cache_key(str(sample_nb), base_config, False)
89+
assert key1 != key2
90+
91+
def test_modified_file_different_key(self, sample_nb, base_config):
92+
key1 = _compute_cache_key(str(sample_nb), base_config, False)
93+
94+
# Modify the file
95+
nb = json.loads(sample_nb.read_text())
96+
nb["cells"].append({"cell_type": "code", "metadata": {}, "source": ["x = 1"]})
97+
sample_nb.write_text(json.dumps(nb))
98+
99+
key2 = _compute_cache_key(str(sample_nb), base_config, False)
100+
assert key1 != key2
101+
102+
103+
class TestGetCachePath:
104+
def test_returns_json_path(self):
105+
path = _get_cache_path("/tmp/cache", "abc123")
106+
assert path == pathlib.Path("/tmp/cache/abc123.json")
107+
108+
def test_path_under_cache_dir(self):
109+
path = _get_cache_path(".cache/mkdocs-jupyter", "deadbeef")
110+
assert str(path) == ".cache/mkdocs-jupyter/deadbeef.json"
111+
112+
113+
class TestCacheIntegration:
114+
"""Integration tests using full mkdocs build."""
115+
116+
def test_cache_populated_on_first_build(self):
117+
"""First build should create cache files."""
118+
from mkdocs.commands.build import build
119+
from mkdocs.config import load_config
120+
121+
this_dir = os.path.dirname(os.path.realpath(__file__))
122+
config_file = os.path.join(this_dir, "mkdocs/base-with-nbs.yml")
123+
124+
with tempfile.TemporaryDirectory() as cache_dir:
125+
cfg = load_config(config_file)
126+
cfg["plugins"]["mkdocs-jupyter"].config["cache"] = True
127+
cfg["plugins"]["mkdocs-jupyter"].config["cache_dir"] = cache_dir
128+
129+
build(cfg)
130+
131+
cache_files = list(pathlib.Path(cache_dir).glob("*.json"))
132+
assert len(cache_files) > 0, "Cache files should be created on first build"
133+
134+
def test_cache_hit_on_second_build(self):
135+
"""Second build should use cached results (nb2html not called again)."""
136+
from mkdocs.commands.build import build
137+
from mkdocs.config import load_config
138+
139+
this_dir = os.path.dirname(os.path.realpath(__file__))
140+
config_file = os.path.join(this_dir, "mkdocs/base-with-nbs.yml")
141+
142+
with tempfile.TemporaryDirectory() as cache_dir:
143+
cfg = load_config(config_file)
144+
cfg["plugins"]["mkdocs-jupyter"].config["cache"] = True
145+
cfg["plugins"]["mkdocs-jupyter"].config["cache_dir"] = cache_dir
146+
147+
# First build populates cache
148+
build(cfg)
149+
150+
# Second build should hit cache — nb2html should not be called
151+
cfg2 = load_config(config_file)
152+
cfg2["plugins"]["mkdocs-jupyter"].config["cache"] = True
153+
cfg2["plugins"]["mkdocs-jupyter"].config["cache_dir"] = cache_dir
154+
155+
with patch("mkdocs_jupyter.convert.nb2html") as mock_nb2html:
156+
build(cfg2)
157+
mock_nb2html.assert_not_called()
158+
159+
def test_cache_disabled(self):
160+
"""When cache=False, no cache files should be created."""
161+
from mkdocs.commands.build import build
162+
from mkdocs.config import load_config
163+
164+
this_dir = os.path.dirname(os.path.realpath(__file__))
165+
config_file = os.path.join(this_dir, "mkdocs/base-with-nbs.yml")
166+
167+
with tempfile.TemporaryDirectory() as cache_dir:
168+
cfg = load_config(config_file)
169+
cfg["plugins"]["mkdocs-jupyter"].config["cache"] = False
170+
cfg["plugins"]["mkdocs-jupyter"].config["cache_dir"] = cache_dir
171+
172+
build(cfg)
173+
174+
cache_files = list(pathlib.Path(cache_dir).glob("*.json"))
175+
assert len(cache_files) == 0, "No cache files when cache is disabled"
176+
177+
def test_stale_cache_evicted(self):
178+
"""Stale cache files from previous builds are cleaned up."""
179+
from mkdocs.commands.build import build
180+
from mkdocs.config import load_config
181+
182+
this_dir = os.path.dirname(os.path.realpath(__file__))
183+
config_file = os.path.join(this_dir, "mkdocs/base-with-nbs.yml")
184+
185+
with tempfile.TemporaryDirectory() as cache_dir:
186+
# Plant a stale cache file
187+
stale = pathlib.Path(cache_dir) / "stale_old_entry.json"
188+
stale.write_text("{}")
189+
190+
cfg = load_config(config_file)
191+
cfg["plugins"]["mkdocs-jupyter"].config["cache"] = True
192+
cfg["plugins"]["mkdocs-jupyter"].config["cache_dir"] = cache_dir
193+
194+
build(cfg)
195+
196+
assert not stale.exists(), "Stale cache file should be evicted"
197+
# But valid cache files should remain
198+
cache_files = list(pathlib.Path(cache_dir).glob("*.json"))
199+
assert len(cache_files) > 0

0 commit comments

Comments
 (0)