Merge pull request #35 from oreillymedia/example_xref_bug_fix

delfanbaum · web-flow · commit 0e802fdcb788 · 2023-01-13T13:27:49.000-06:00
Handle references to formal examples
diff --git a/jupyter_book_to_htmlbook/file_processing.py b/jupyter_book_to_htmlbook/file_processing.py
@@ -8,7 +8,8 @@
 from .footnote_processing import process_footnotes
 from .math_processing import process_math
 from .reference_processing import (
-        process_interal_refs,
+        process_internal_refs,
+        process_remaining_refs,
         process_ids,
         process_citations,
         add_glossary_datatypes
@@ -239,7 +240,7 @@ def process_chapter(toc_element,
     # note: must process figs before xrefs
     chapter = process_figures(chapter, build_dir)
     chapter = process_informal_figs(chapter, build_dir)
-    chapter = process_interal_refs(chapter)
+    chapter = process_internal_refs(chapter)
     chapter = process_citations(chapter)
     chapter = process_footnotes(chapter)
     chapter = process_admonitions(chapter)
@@ -250,10 +251,13 @@ def process_chapter(toc_element,
     chapter = move_span_ids_to_sections(chapter)
     chapter = process_sidebars(chapter)
     chapter = process_subsections(chapter)
+    # finally, process any remaining xrefs
+    chapter = process_remaining_refs(chapter)
 
     if chapter.get("data-type") == "glossary":
         add_glossary_datatypes(chapter)
 
+    # ensure we have unique IDs across the book
     chapter, ids = process_ids(chapter, book_ids)
 
     # write the file, preserving any directory structure(s) from source
diff --git a/jupyter_book_to_htmlbook/reference_processing.py b/jupyter_book_to_htmlbook/reference_processing.py
@@ -4,7 +4,7 @@
 from .helpers import base_soup
 
 
-def process_interal_refs(chapter):
+def process_internal_refs(chapter):
     """
     Processes internal a tags with "reference internal" classes.
     Converts bib references into spans (to deal with later), and other
@@ -45,6 +45,26 @@ def process_interal_refs(chapter):
     return chapter
 
 
+def process_remaining_refs(chapter):
+    """
+    Processing for any non-internal "xref" classed spans (i.e., those
+    that Jupyter can't find targets for)
+    """
+    xrefs = chapter.find_all("span", class_="xref")
+    for ref in xrefs:
+        # convert to proper htmlbook cross reference
+        if ref.string and ref.string.find(" ") == -1:
+            ref.name = "a"
+            ref["data-type"] = "xref"
+            ref["href"] = f"#{ref.string}"
+            ref.string = ref.get("href")
+        else:  # in the unlikely case of a badly formatted xref
+            logging.warning(
+                f"Failed to apply xref formatting to {ref}.")
+
+    return chapter
+
+
 def process_ids(chapter, existing_ids=[]):
     """
     Checks a list of IDs against ids that are already being used in the
diff --git a/tests/test_reference_processing.py b/tests/test_reference_processing.py
@@ -1,6 +1,11 @@
 import logging
+import shutil
 from bs4 import BeautifulSoup  # type: ignore
-from jupyter_book_to_htmlbook.reference_processing import process_interal_refs
+from jupyter_book_to_htmlbook.file_processing import process_chapter
+from jupyter_book_to_htmlbook.reference_processing import (
+        process_internal_refs,
+        process_remaining_refs
+    )
 
 
 class TestInternalRefs:
@@ -11,7 +16,7 @@ def test_process_internal_refs_reg_xrefs(self):
         chapter_text = """<a class="reference internal" href="example.html">
         cross reference text</a>"""
         chapter = BeautifulSoup(chapter_text, 'html.parser')
-        result = process_interal_refs(chapter)
+        result = process_internal_refs(chapter)
         assert str(result) == '<a class="reference internal" data-type=' + \
                               '"xref" href="#example.html">#example.html</a>'
 
@@ -29,7 +34,7 @@ def test_process_internal_refs_bibliograpy(self):
 title="Terry Carver...">Carver, 1993</a>]</span>.</p>
 """
         chapter = BeautifulSoup(text, 'html.parser')
-        result = process_interal_refs(chapter)
+        result = process_internal_refs(chapter)
         assert not result.find("a")
         assert "(Baruch 1993)" in result.find("span").contents
 
@@ -42,7 +47,72 @@ def test_alert_on_external_images(self, caplog):
     href="http://example.com/example.png"><img alt="example"
     src="http://example.com/example.png" style="width:100px" /></a>"""
         chapter = BeautifulSoup(chapter_text, 'html.parser')
-        result = process_interal_refs(chapter)
+        result = process_internal_refs(chapter)
         assert result == chapter
         caplog.set_level(logging.DEBUG)
         assert "External image reference:" in caplog.text
+
+
+class TestStandardRefs:
+    """
+    Tests around "std-ref" references, which appear as spans (in the case
+    where Jupyter Book can't find the actual reference).
+    """
+    def test_process_xref_spans(self):
+        """
+        It appears that when an xref doesn't have a target jupyter knows about
+        (e.g., in the case of examples), it puts them into spans. We should
+        check for these and then convert them appropriately.
+        """
+        chapter = BeautifulSoup("""<p>And here follows a formal code example
+(<span class="xref std std-ref">code_example</span>).
+Note that the cell has an “example” tag added to its metadata.</p>""",
+                                "html.parser")
+        result = process_remaining_refs(chapter)
+        xref = result.find("a", class_="xref")
+        assert xref
+        assert xref.get('data-type') == "xref"
+        assert xref.get('href') == "#code_example"
+        assert xref.string == "#code_example"
+
+    def test_process_xref_spans_bad_ref(self, caplog):
+        """
+        In the unlikely case wherein we get a bad xref (i.e., one with
+        spaces or code in it), we log that failure and do nothing
+        """
+        chapter = BeautifulSoup("""<p>And here follows a formal code example
+(<span class="xref std std-ref">code example</span>). Another is
+<span class="xref std std-ref"><span>some_</span><em>code_example</em></span>.
+Note that the cell has an “example” tag added to its metadata.</p>""",
+                                "html.parser")
+        process_remaining_refs(chapter)
+        caplog.set_level(logging.DEBUG)
+        log = caplog.text
+        assert "Failed to apply" in log
+        assert "code example" in log
+        assert "<em>code_example</em>" in log
+
+    def test_examples_refs_in_chapter_processing(self, tmp_path):
+        """
+        More an integration test, ensuring that when we process a chapter
+        the examples are data-typed as such, and that they still get their
+        highlighting
+        """
+        test_env = tmp_path / 'tmp'
+        test_out = test_env / 'output'
+        test_env.mkdir()
+        test_out.mkdir()
+        shutil.copytree('tests/example_book/_build/html/notebooks',
+                        test_env, dirs_exist_ok=True)
+
+        process_chapter(test_env / "code_py.html",
+                        test_env, test_out)
+        with open(test_out / 'code_py.html') as f:
+            soup = BeautifulSoup(f.read(), "html.parser")
+
+        xref = soup.find("a", class_="xref")
+        assert xref
+        assert xref.get("href") == "#hello_tim"
+        assert xref.get("data-type") == "xref"
+        assert xref.get("href") == "#hello_tim"
+        assert xref.string == "#hello_tim"