Clean up figure processing code

delfanbaum · delfanbaum · commit 5b77721a52d8 · 2023-06-15T12:14:44.000-04:00
Removed some extraneous arguments; I believe what happened was we (in
earlier versions) were moving images ourselves and redefining their
paths, but we don't want to do that anymore in the current context of
the book being consumed in Atlas.

Update: improve type hints
diff --git a/jupyter_book_to_htmlbook/code_processing.py b/jupyter_book_to_htmlbook/code_processing.py
@@ -1,10 +1,11 @@
 import re
 import logging
+from typing import Union
 from bs4 import NavigableString  # type: ignore
 from .helpers import base_soup
 
 
-def process_code(chapter, skip_numbering=False):
+def process_code(chapter, skip_numbering: Union[bool, None] = False):
     """
     Turn rendered <pre> blocks into appropriately marked-up HTMLBook
     """
diff --git a/jupyter_book_to_htmlbook/figure_processing.py b/jupyter_book_to_htmlbook/figure_processing.py
@@ -1,8 +1,7 @@
-from pathlib import Path
 from bs4 import NavigableString  # type: ignore
 
 
-def process_figures(chapter, build_dir: Path):
+def process_figures(chapter):
     """
     Takes a chapter soup and handles changing the references to figures
     to the /images directory per usual htmlbook repo
@@ -42,13 +41,13 @@ def process_figures(chapter, build_dir: Path):
     return chapter
 
 
-def process_informal_figs(chapter, build_dir=''):
+def process_informal_figs(chapter):
     """
     This should be run *AFTER* process figs, but basically just repoints the
     img tags.
     """
     for img in chapter.find_all('img'):
-        # Since, weirdly, a myst-marked image will be in a floating anchor,
+        # Since, weirdly, a myst-marked image will be in a floating anchor
         if img.parent.name == 'a':
             img.parent.name = "figure"
             img.parent['class'] = "informal"
diff --git a/jupyter_book_to_htmlbook/file_processing.py b/jupyter_book_to_htmlbook/file_processing.py
@@ -1,7 +1,7 @@
 import logging
 import re
 from pathlib import Path
-from typing import Union, Optional
+from typing import Union, Optional, Tuple
 from bs4 import BeautifulSoup  # type: ignore
 from .admonition_processing import process_admonitions
 from .figure_processing import process_figures, process_informal_figs
@@ -151,7 +151,8 @@ def get_main_section(soup):
     return main, bibliography
 
 
-def process_chapter_soup(toc_element: Union[Path, list[Path]]):
+def process_chapter_soup(
+        toc_element: Union[Path, list[Path]]) -> Tuple[BeautifulSoup, str]:
     """ unified file chapter processing """
 
     if isinstance(toc_element, list):  # i.e., an ordered list of chapter parts
@@ -169,8 +170,12 @@ def process_chapter_soup(toc_element: Union[Path, list[Path]]):
     # perform initial swapping and namespace designation
     chapter, bib = get_main_section(base_soup)
 
-    if not chapter:
-        return None, None
+    if not chapter:  # guard against malformed files
+        logging.warning(f"Failed to process {toc_element}.")
+        raise RuntimeError(
+            f"Failed to process {toc_element}. Please check for error in " +
+            "your source file(s). Contact the Tools team for additional " +
+            "support.")
 
     else:
         chapter['xmlns'] = 'http://www.w3.org/1999/xhtml'  # type: ignore
@@ -184,8 +189,8 @@ def process_chapter_soup(toc_element: Union[Path, list[Path]]):
                 subsection, sub_bib = process_chapter_subparts(subfile)
                 chapter.append(subsection)
                 if bib and sub_bib:
-                    entries = sub_bib.find_all("dd")
-                    bib.dl.extend(entries)
+                    entries = sub_bib.find_all("dd")  # type: ignore
+                    bib.dl.extend(entries)  # type: ignore
                 elif sub_bib:
                     bib = sub_bib
 
@@ -233,18 +238,11 @@ def process_chapter(toc_element,
     chapter, ch_name = process_chapter_soup(toc_element)
     logging.info(f"Processing {ch_name}...")
 
-    if not chapter:  # guard against malformed files
-        logging.warning(f"Failed to process {toc_element}.")
-        raise RuntimeError(
-            f"Failed to process {toc_element}. Please check for error in " +
-            "your source file(s). Contact the Tools team for additional " +
-            "support.")
-
     # perform cleans and processing
     chapter = clean_chapter(chapter)
     # note: must process figs before xrefs
-    chapter = process_figures(chapter, build_dir)
-    chapter = process_informal_figs(chapter, build_dir)
+    chapter = process_figures(chapter)
+    chapter = process_informal_figs(chapter)
     chapter = process_internal_refs(chapter)
     chapter = process_citations(chapter)
     chapter = process_footnotes(chapter)
diff --git a/tests/test_figure_processing.py b/tests/test_figure_processing.py
@@ -1,5 +1,4 @@
 import pytest
-from pathlib import Path
 from bs4 import BeautifulSoup as Soup  # type: ignore
 from jupyter_book_to_htmlbook.figure_processing import (
         process_figures,
@@ -47,7 +46,7 @@ def test_simple_figure_case(self):
 <a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
 </p></figcaption>"""
         chapter = Soup(text, 'html.parser')
-        result = process_figures(chapter, Path('example'))
+        result = process_figures(chapter)
         assert not result.find("figcaption").find("a", class_="headerlink")
         assert result.find("img").get("style") is None
         assert not result.find("span", class_="caption-number")
@@ -66,7 +65,7 @@ def test_markdown_figure_case(self):
 <a class="headerlink" href="#markdown-fig" title="Permalink to this image">
 #</a></p></figcaption></figure>"""
         chapter = Soup(text, 'html.parser')
-        result = process_figures(chapter, Path('example'))
+        result = process_figures(chapter)
         assert not result.find("figcaption").find("a", class_="headerlink")
         assert result.find("img").get("style") is None
         assert not result.find("span", class_="caption-number")
@@ -76,7 +75,7 @@ def test_markdown_image(self):
         """ support bare markdown images, i.e., informal figs """
         text = '<p><img alt="Flower" src="../_images/flower.png" /></p>'
         chapter = Soup(text, 'html.parser')
-        result = process_informal_figs(chapter, Path('example'))
+        result = process_informal_figs(chapter)
         assert str(result) == (
             '<figure class="informal"><img alt="Flower" ' +
             'src="../_images/flower.png"/></figure>')
@@ -88,7 +87,7 @@ def test_myst_image(self):
                'e-class align-center" src="_images/flower.png" style="' + \
                'width: 249px; height: 150px;" /></a>'
         chapter = Soup(text, 'html.parser')
-        result = process_informal_figs(chapter, Path('example'))
+        result = process_informal_figs(chapter)
         assert str(result) == (
             '<figure class="informal"><img alt="flower" ' +
             'src="_images/flower.png"/></figure>')
@@ -109,7 +108,7 @@ def test_extra_p_tags_and_spaces_are_removed_from_captions(self):
 <a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
 </p></figcaption>"""
         chapter = Soup(text, 'html.parser')
-        result = process_figures(chapter, Path('example'))
+        result = process_figures(chapter)
         caption = result.find("figcaption")
         assert not caption.p
         assert "\n" not in caption.string
@@ -130,7 +129,7 @@ def test_markup_is_preserved_in_captions(self):
 <a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
 </p></figcaption>"""
         chapter = Soup(text, 'html.parser')
-        result = process_figures(chapter, Path('example'))
+        result = process_figures(chapter)
         caption = result.find("figcaption")
         assert not caption.p
         assert caption.find("code")
@@ -152,7 +151,7 @@ def test_markup_is_preserved_in_captions_at_beginning(self):
 <a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
 </p></figcaption>"""
         chapter = Soup(text, 'html.parser')
-        result = process_figures(chapter, Path('example'))
+        result = process_figures(chapter)
         caption = result.find("figcaption")
         assert caption.find("strong")
         assert caption.find("em")
@@ -175,7 +174,7 @@ def test_no_anchor_wrap(self):
 <a class="headerlink" href="#markdown-fig" title="Permalink to this image">
 #</a></p></figcaption></figure>"""
         chapter = Soup(text, 'html.parser')
-        result = process_figures(chapter, Path('example'))
+        result = process_figures(chapter)
         assert result.find("figcaption")
         assert not result.find("figcaption").find("a", class_="headerlink")
         assert result.find("img").get("style") is None
@@ -197,7 +196,7 @@ def test_no_caption_number(self):
 <a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
 </p></figcaption>"""
         chapter = Soup(text, 'html.parser')
-        result = process_figures(chapter, Path('example'))
+        result = process_figures(chapter)
         assert result.find("figcaption")
         assert not result.find("figcaption").find("a", class_="headerlink")
         assert result.find("img").get("style") is None
@@ -217,7 +216,7 @@ def test_generated_figure_processing(self, code_generated_figure):
         """
         Minimal generated figure test
         """
-        result = process_figures(code_generated_figure, "")
+        result = process_figures(code_generated_figure)
         assert result.find("figure").get("id") == "code-output-fig"
         assert result.find("figcaption")
         assert not result.find("figcaption").find("a", class_="headerlink")