Skip to content

Commit 0e802fd

Browse files
authored
Merge pull request #35 from oreillymedia/example_xref_bug_fix
Handle references to formal examples
2 parents 7077518 + b4ce507 commit 0e802fd

File tree

3 files changed

+101
-7
lines changed

3 files changed

+101
-7
lines changed

jupyter_book_to_htmlbook/file_processing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
from .footnote_processing import process_footnotes
99
from .math_processing import process_math
1010
from .reference_processing import (
11-
process_interal_refs,
11+
process_internal_refs,
12+
process_remaining_refs,
1213
process_ids,
1314
process_citations,
1415
add_glossary_datatypes
@@ -239,7 +240,7 @@ def process_chapter(toc_element,
239240
# note: must process figs before xrefs
240241
chapter = process_figures(chapter, build_dir)
241242
chapter = process_informal_figs(chapter, build_dir)
242-
chapter = process_interal_refs(chapter)
243+
chapter = process_internal_refs(chapter)
243244
chapter = process_citations(chapter)
244245
chapter = process_footnotes(chapter)
245246
chapter = process_admonitions(chapter)
@@ -250,10 +251,13 @@ def process_chapter(toc_element,
250251
chapter = move_span_ids_to_sections(chapter)
251252
chapter = process_sidebars(chapter)
252253
chapter = process_subsections(chapter)
254+
# finally, process any remaining xrefs
255+
chapter = process_remaining_refs(chapter)
253256

254257
if chapter.get("data-type") == "glossary":
255258
add_glossary_datatypes(chapter)
256259

260+
# ensure we have unique IDs across the book
257261
chapter, ids = process_ids(chapter, book_ids)
258262

259263
# write the file, preserving any directory structure(s) from source

jupyter_book_to_htmlbook/reference_processing.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from .helpers import base_soup
55

66

7-
def process_interal_refs(chapter):
7+
def process_internal_refs(chapter):
88
"""
99
Processes internal a tags with "reference internal" classes.
1010
Converts bib references into spans (to deal with later), and other
@@ -45,6 +45,26 @@ def process_interal_refs(chapter):
4545
return chapter
4646

4747

48+
def process_remaining_refs(chapter):
49+
"""
50+
Processing for any non-internal "xref" classed spans (i.e., those
51+
that Jupyter can't find targets for)
52+
"""
53+
xrefs = chapter.find_all("span", class_="xref")
54+
for ref in xrefs:
55+
# convert to proper htmlbook cross reference
56+
if ref.string and ref.string.find(" ") == -1:
57+
ref.name = "a"
58+
ref["data-type"] = "xref"
59+
ref["href"] = f"#{ref.string}"
60+
ref.string = ref.get("href")
61+
else: # in the unlikely case of a badly formatted xref
62+
logging.warning(
63+
f"Failed to apply xref formatting to {ref}.")
64+
65+
return chapter
66+
67+
4868
def process_ids(chapter, existing_ids=[]):
4969
"""
5070
Checks a list of IDs against ids that are already being used in the

tests/test_reference_processing.py

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import logging
2+
import shutil
23
from bs4 import BeautifulSoup # type: ignore
3-
from jupyter_book_to_htmlbook.reference_processing import process_interal_refs
4+
from jupyter_book_to_htmlbook.file_processing import process_chapter
5+
from jupyter_book_to_htmlbook.reference_processing import (
6+
process_internal_refs,
7+
process_remaining_refs
8+
)
49

510

611
class TestInternalRefs:
@@ -11,7 +16,7 @@ def test_process_internal_refs_reg_xrefs(self):
1116
chapter_text = """<a class="reference internal" href="example.html">
1217
cross reference text</a>"""
1318
chapter = BeautifulSoup(chapter_text, 'html.parser')
14-
result = process_interal_refs(chapter)
19+
result = process_internal_refs(chapter)
1520
assert str(result) == '<a class="reference internal" data-type=' + \
1621
'"xref" href="#example.html">#example.html</a>'
1722

@@ -29,7 +34,7 @@ def test_process_internal_refs_bibliograpy(self):
2934
title="Terry Carver...">Carver, 1993</a>]</span>.</p>
3035
"""
3136
chapter = BeautifulSoup(text, 'html.parser')
32-
result = process_interal_refs(chapter)
37+
result = process_internal_refs(chapter)
3338
assert not result.find("a")
3439
assert "(Baruch 1993)" in result.find("span").contents
3540

@@ -42,7 +47,72 @@ def test_alert_on_external_images(self, caplog):
4247
href="http://example.com/example.png"><img alt="example"
4348
src="http://example.com/example.png" style="width:100px" /></a>"""
4449
chapter = BeautifulSoup(chapter_text, 'html.parser')
45-
result = process_interal_refs(chapter)
50+
result = process_internal_refs(chapter)
4651
assert result == chapter
4752
caplog.set_level(logging.DEBUG)
4853
assert "External image reference:" in caplog.text
54+
55+
56+
class TestStandardRefs:
57+
"""
58+
Tests around "std-ref" references, which appear as spans (in the case
59+
where Jupyter Book can't find the actual reference).
60+
"""
61+
def test_process_xref_spans(self):
62+
"""
63+
It appears that when an xref doesn't have a target jupyter knows about
64+
(e.g., in the case of examples), it puts them into spans. We should
65+
check for these and then convert them appropriately.
66+
"""
67+
chapter = BeautifulSoup("""<p>And here follows a formal code example
68+
(<span class="xref std std-ref">code_example</span>).
69+
Note that the cell has an “example” tag added to its metadata.</p>""",
70+
"html.parser")
71+
result = process_remaining_refs(chapter)
72+
xref = result.find("a", class_="xref")
73+
assert xref
74+
assert xref.get('data-type') == "xref"
75+
assert xref.get('href') == "#code_example"
76+
assert xref.string == "#code_example"
77+
78+
def test_process_xref_spans_bad_ref(self, caplog):
79+
"""
80+
In the unlikely case wherein we get a bad xref (i.e., one with
81+
spaces or code in it), we log that failure and do nothing
82+
"""
83+
chapter = BeautifulSoup("""<p>And here follows a formal code example
84+
(<span class="xref std std-ref">code example</span>). Another is
85+
<span class="xref std std-ref"><span>some_</span><em>code_example</em></span>.
86+
Note that the cell has an “example” tag added to its metadata.</p>""",
87+
"html.parser")
88+
process_remaining_refs(chapter)
89+
caplog.set_level(logging.DEBUG)
90+
log = caplog.text
91+
assert "Failed to apply" in log
92+
assert "code example" in log
93+
assert "<em>code_example</em>" in log
94+
95+
def test_examples_refs_in_chapter_processing(self, tmp_path):
96+
"""
97+
More an integration test, ensuring that when we process a chapter
98+
the examples are data-typed as such, and that they still get their
99+
highlighting
100+
"""
101+
test_env = tmp_path / 'tmp'
102+
test_out = test_env / 'output'
103+
test_env.mkdir()
104+
test_out.mkdir()
105+
shutil.copytree('tests/example_book/_build/html/notebooks',
106+
test_env, dirs_exist_ok=True)
107+
108+
process_chapter(test_env / "code_py.html",
109+
test_env, test_out)
110+
with open(test_out / 'code_py.html') as f:
111+
soup = BeautifulSoup(f.read(), "html.parser")
112+
113+
xref = soup.find("a", class_="xref")
114+
assert xref
115+
assert xref.get("href") == "#hello_tim"
116+
assert xref.get("data-type") == "xref"
117+
assert xref.get("href") == "#hello_tim"
118+
assert xref.string == "#hello_tim"

0 commit comments

Comments
 (0)