Skip to content

Commit b4ce507

Browse files
committed
Add handling for non-anchor xref spans
If Juptyer Book can't find a target for a given `{ref}`, it will create a `<span>` tag with `xref` and `std-ref` classes applied to it. Since our formal example workaround adds targets after the fact, that means that any reference to a formal example is going to be a `<span>` instead of an `<a>`, which would get picked up and correctly converted by our `process_internal_refs` function (which has been renamed in this commit, since there was a typo). This commit/PR adds code to handle these references so authors can xref to the formal code examples, as well as tests to ensure that the conversion happens correctly as such and in chapter processing.
1 parent 7077518 commit b4ce507

File tree

3 files changed

+101
-7
lines changed

3 files changed

+101
-7
lines changed

jupyter_book_to_htmlbook/file_processing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
from .footnote_processing import process_footnotes
99
from .math_processing import process_math
1010
from .reference_processing import (
11-
process_interal_refs,
11+
process_internal_refs,
12+
process_remaining_refs,
1213
process_ids,
1314
process_citations,
1415
add_glossary_datatypes
@@ -239,7 +240,7 @@ def process_chapter(toc_element,
239240
# note: must process figs before xrefs
240241
chapter = process_figures(chapter, build_dir)
241242
chapter = process_informal_figs(chapter, build_dir)
242-
chapter = process_interal_refs(chapter)
243+
chapter = process_internal_refs(chapter)
243244
chapter = process_citations(chapter)
244245
chapter = process_footnotes(chapter)
245246
chapter = process_admonitions(chapter)
@@ -250,10 +251,13 @@ def process_chapter(toc_element,
250251
chapter = move_span_ids_to_sections(chapter)
251252
chapter = process_sidebars(chapter)
252253
chapter = process_subsections(chapter)
254+
# finally, process any remaining xrefs
255+
chapter = process_remaining_refs(chapter)
253256

254257
if chapter.get("data-type") == "glossary":
255258
add_glossary_datatypes(chapter)
256259

260+
# ensure we have unique IDs across the book
257261
chapter, ids = process_ids(chapter, book_ids)
258262

259263
# write the file, preserving any directory structure(s) from source

jupyter_book_to_htmlbook/reference_processing.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from .helpers import base_soup
55

66

7-
def process_interal_refs(chapter):
7+
def process_internal_refs(chapter):
88
"""
99
Processes internal a tags with "reference internal" classes.
1010
Converts bib references into spans (to deal with later), and other
@@ -45,6 +45,26 @@ def process_interal_refs(chapter):
4545
return chapter
4646

4747

48+
def process_remaining_refs(chapter):
49+
"""
50+
Processing for any non-internal "xref" classed spans (i.e., those
51+
that Jupyter can't find targets for)
52+
"""
53+
xrefs = chapter.find_all("span", class_="xref")
54+
for ref in xrefs:
55+
# convert to proper htmlbook cross reference
56+
if ref.string and ref.string.find(" ") == -1:
57+
ref.name = "a"
58+
ref["data-type"] = "xref"
59+
ref["href"] = f"#{ref.string}"
60+
ref.string = ref.get("href")
61+
else: # in the unlikely case of a badly formatted xref
62+
logging.warning(
63+
f"Failed to apply xref formatting to {ref}.")
64+
65+
return chapter
66+
67+
4868
def process_ids(chapter, existing_ids=[]):
4969
"""
5070
Checks a list of IDs against ids that are already being used in the

tests/test_reference_processing.py

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import logging
2+
import shutil
23
from bs4 import BeautifulSoup # type: ignore
3-
from jupyter_book_to_htmlbook.reference_processing import process_interal_refs
4+
from jupyter_book_to_htmlbook.file_processing import process_chapter
5+
from jupyter_book_to_htmlbook.reference_processing import (
6+
process_internal_refs,
7+
process_remaining_refs
8+
)
49

510

611
class TestInternalRefs:
@@ -11,7 +16,7 @@ def test_process_internal_refs_reg_xrefs(self):
1116
chapter_text = """<a class="reference internal" href="example.html">
1217
cross reference text</a>"""
1318
chapter = BeautifulSoup(chapter_text, 'html.parser')
14-
result = process_interal_refs(chapter)
19+
result = process_internal_refs(chapter)
1520
assert str(result) == '<a class="reference internal" data-type=' + \
1621
'"xref" href="#example.html">#example.html</a>'
1722

@@ -29,7 +34,7 @@ def test_process_internal_refs_bibliograpy(self):
2934
title="Terry Carver...">Carver, 1993</a>]</span>.</p>
3035
"""
3136
chapter = BeautifulSoup(text, 'html.parser')
32-
result = process_interal_refs(chapter)
37+
result = process_internal_refs(chapter)
3338
assert not result.find("a")
3439
assert "(Baruch 1993)" in result.find("span").contents
3540

@@ -42,7 +47,72 @@ def test_alert_on_external_images(self, caplog):
4247
href="http://example.com/example.png"><img alt="example"
4348
src="http://example.com/example.png" style="width:100px" /></a>"""
4449
chapter = BeautifulSoup(chapter_text, 'html.parser')
45-
result = process_interal_refs(chapter)
50+
result = process_internal_refs(chapter)
4651
assert result == chapter
4752
caplog.set_level(logging.DEBUG)
4853
assert "External image reference:" in caplog.text
54+
55+
56+
class TestStandardRefs:
57+
"""
58+
Tests around "std-ref" references, which appear as spans (in the case
59+
where Jupyter Book can't find the actual reference).
60+
"""
61+
def test_process_xref_spans(self):
62+
"""
63+
It appears that when an xref doesn't have a target jupyter knows about
64+
(e.g., in the case of examples), it puts them into spans. We should
65+
check for these and then convert them appropriately.
66+
"""
67+
chapter = BeautifulSoup("""<p>And here follows a formal code example
68+
(<span class="xref std std-ref">code_example</span>).
69+
Note that the cell has an “example” tag added to its metadata.</p>""",
70+
"html.parser")
71+
result = process_remaining_refs(chapter)
72+
xref = result.find("a", class_="xref")
73+
assert xref
74+
assert xref.get('data-type') == "xref"
75+
assert xref.get('href') == "#code_example"
76+
assert xref.string == "#code_example"
77+
78+
def test_process_xref_spans_bad_ref(self, caplog):
79+
"""
80+
In the unlikely case wherein we get a bad xref (i.e., one with
81+
spaces or code in it), we log that failure and do nothing
82+
"""
83+
chapter = BeautifulSoup("""<p>And here follows a formal code example
84+
(<span class="xref std std-ref">code example</span>). Another is
85+
<span class="xref std std-ref"><span>some_</span><em>code_example</em></span>.
86+
Note that the cell has an “example” tag added to its metadata.</p>""",
87+
"html.parser")
88+
process_remaining_refs(chapter)
89+
caplog.set_level(logging.DEBUG)
90+
log = caplog.text
91+
assert "Failed to apply" in log
92+
assert "code example" in log
93+
assert "<em>code_example</em>" in log
94+
95+
def test_examples_refs_in_chapter_processing(self, tmp_path):
96+
"""
97+
More an integration test, ensuring that when we process a chapter
98+
the examples are data-typed as such, and that they still get their
99+
highlighting
100+
"""
101+
test_env = tmp_path / 'tmp'
102+
test_out = test_env / 'output'
103+
test_env.mkdir()
104+
test_out.mkdir()
105+
shutil.copytree('tests/example_book/_build/html/notebooks',
106+
test_env, dirs_exist_ok=True)
107+
108+
process_chapter(test_env / "code_py.html",
109+
test_env, test_out)
110+
with open(test_out / 'code_py.html') as f:
111+
soup = BeautifulSoup(f.read(), "html.parser")
112+
113+
xref = soup.find("a", class_="xref")
114+
assert xref
115+
assert xref.get("href") == "#hello_tim"
116+
assert xref.get("data-type") == "xref"
117+
assert xref.get("href") == "#hello_tim"
118+
assert xref.string == "#hello_tim"

0 commit comments

Comments
 (0)