Skip to content

Commit 5b77721

Browse files
committed
Clean up figure processing code
Removed some extraneous arguments; I believe what happened was we (in earlier versions) were moving images ourselves and redefining their paths, but we don't want to do that anymore in the current context of the book being consumed in Atlas. Update: improve type hints
1 parent 64e2705 commit 5b77721

File tree

4 files changed

+28
-31
lines changed

4 files changed

+28
-31
lines changed

jupyter_book_to_htmlbook/code_processing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import re
22
import logging
3+
from typing import Union
34
from bs4 import NavigableString # type: ignore
45
from .helpers import base_soup
56

67

7-
def process_code(chapter, skip_numbering=False):
8+
def process_code(chapter, skip_numbering: Union[bool, None] = False):
89
"""
910
Turn rendered <pre> blocks into appropriately marked-up HTMLBook
1011
"""

jupyter_book_to_htmlbook/figure_processing.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
from pathlib import Path
21
from bs4 import NavigableString # type: ignore
32

43

5-
def process_figures(chapter, build_dir: Path):
4+
def process_figures(chapter):
65
"""
76
Takes a chapter soup and handles changing the references to figures
87
to the /images directory per usual htmlbook repo
@@ -42,13 +41,13 @@ def process_figures(chapter, build_dir: Path):
4241
return chapter
4342

4443

45-
def process_informal_figs(chapter, build_dir=''):
44+
def process_informal_figs(chapter):
4645
"""
4746
This should be run *AFTER* process figs, but basically just repoints the
4847
img tags.
4948
"""
5049
for img in chapter.find_all('img'):
51-
# Since, weirdly, a myst-marked image will be in a floating anchor,
50+
# Since, weirdly, a myst-marked image will be in a floating anchor
5251
if img.parent.name == 'a':
5352
img.parent.name = "figure"
5453
img.parent['class'] = "informal"

jupyter_book_to_htmlbook/file_processing.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import re
33
from pathlib import Path
4-
from typing import Union, Optional
4+
from typing import Union, Optional, Tuple
55
from bs4 import BeautifulSoup # type: ignore
66
from .admonition_processing import process_admonitions
77
from .figure_processing import process_figures, process_informal_figs
@@ -151,7 +151,8 @@ def get_main_section(soup):
151151
return main, bibliography
152152

153153

154-
def process_chapter_soup(toc_element: Union[Path, list[Path]]):
154+
def process_chapter_soup(
155+
toc_element: Union[Path, list[Path]]) -> Tuple[BeautifulSoup, str]:
155156
""" unified file chapter processing """
156157

157158
if isinstance(toc_element, list): # i.e., an ordered list of chapter parts
@@ -169,8 +170,12 @@ def process_chapter_soup(toc_element: Union[Path, list[Path]]):
169170
# perform initial swapping and namespace designation
170171
chapter, bib = get_main_section(base_soup)
171172

172-
if not chapter:
173-
return None, None
173+
if not chapter: # guard against malformed files
174+
logging.warning(f"Failed to process {toc_element}.")
175+
raise RuntimeError(
176+
f"Failed to process {toc_element}. Please check for error in " +
177+
"your source file(s). Contact the Tools team for additional " +
178+
"support.")
174179

175180
else:
176181
chapter['xmlns'] = 'http://www.w3.org/1999/xhtml' # type: ignore
@@ -184,8 +189,8 @@ def process_chapter_soup(toc_element: Union[Path, list[Path]]):
184189
subsection, sub_bib = process_chapter_subparts(subfile)
185190
chapter.append(subsection)
186191
if bib and sub_bib:
187-
entries = sub_bib.find_all("dd")
188-
bib.dl.extend(entries)
192+
entries = sub_bib.find_all("dd") # type: ignore
193+
bib.dl.extend(entries) # type: ignore
189194
elif sub_bib:
190195
bib = sub_bib
191196

@@ -233,18 +238,11 @@ def process_chapter(toc_element,
233238
chapter, ch_name = process_chapter_soup(toc_element)
234239
logging.info(f"Processing {ch_name}...")
235240

236-
if not chapter: # guard against malformed files
237-
logging.warning(f"Failed to process {toc_element}.")
238-
raise RuntimeError(
239-
f"Failed to process {toc_element}. Please check for error in " +
240-
"your source file(s). Contact the Tools team for additional " +
241-
"support.")
242-
243241
# perform cleans and processing
244242
chapter = clean_chapter(chapter)
245243
# note: must process figs before xrefs
246-
chapter = process_figures(chapter, build_dir)
247-
chapter = process_informal_figs(chapter, build_dir)
244+
chapter = process_figures(chapter)
245+
chapter = process_informal_figs(chapter)
248246
chapter = process_internal_refs(chapter)
249247
chapter = process_citations(chapter)
250248
chapter = process_footnotes(chapter)

tests/test_figure_processing.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import pytest
2-
from pathlib import Path
32
from bs4 import BeautifulSoup as Soup # type: ignore
43
from jupyter_book_to_htmlbook.figure_processing import (
54
process_figures,
@@ -47,7 +46,7 @@ def test_simple_figure_case(self):
4746
<a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
4847
</p></figcaption>"""
4948
chapter = Soup(text, 'html.parser')
50-
result = process_figures(chapter, Path('example'))
49+
result = process_figures(chapter)
5150
assert not result.find("figcaption").find("a", class_="headerlink")
5251
assert result.find("img").get("style") is None
5352
assert not result.find("span", class_="caption-number")
@@ -66,7 +65,7 @@ def test_markdown_figure_case(self):
6665
<a class="headerlink" href="#markdown-fig" title="Permalink to this image">
6766
#</a></p></figcaption></figure>"""
6867
chapter = Soup(text, 'html.parser')
69-
result = process_figures(chapter, Path('example'))
68+
result = process_figures(chapter)
7069
assert not result.find("figcaption").find("a", class_="headerlink")
7170
assert result.find("img").get("style") is None
7271
assert not result.find("span", class_="caption-number")
@@ -76,7 +75,7 @@ def test_markdown_image(self):
7675
""" support bare markdown images, i.e., informal figs """
7776
text = '<p><img alt="Flower" src="../_images/flower.png" /></p>'
7877
chapter = Soup(text, 'html.parser')
79-
result = process_informal_figs(chapter, Path('example'))
78+
result = process_informal_figs(chapter)
8079
assert str(result) == (
8180
'<figure class="informal"><img alt="Flower" ' +
8281
'src="../_images/flower.png"/></figure>')
@@ -88,7 +87,7 @@ def test_myst_image(self):
8887
'e-class align-center" src="_images/flower.png" style="' + \
8988
'width: 249px; height: 150px;" /></a>'
9089
chapter = Soup(text, 'html.parser')
91-
result = process_informal_figs(chapter, Path('example'))
90+
result = process_informal_figs(chapter)
9291
assert str(result) == (
9392
'<figure class="informal"><img alt="flower" ' +
9493
'src="_images/flower.png"/></figure>')
@@ -109,7 +108,7 @@ def test_extra_p_tags_and_spaces_are_removed_from_captions(self):
109108
<a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
110109
</p></figcaption>"""
111110
chapter = Soup(text, 'html.parser')
112-
result = process_figures(chapter, Path('example'))
111+
result = process_figures(chapter)
113112
caption = result.find("figcaption")
114113
assert not caption.p
115114
assert "\n" not in caption.string
@@ -130,7 +129,7 @@ def test_markup_is_preserved_in_captions(self):
130129
<a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
131130
</p></figcaption>"""
132131
chapter = Soup(text, 'html.parser')
133-
result = process_figures(chapter, Path('example'))
132+
result = process_figures(chapter)
134133
caption = result.find("figcaption")
135134
assert not caption.p
136135
assert caption.find("code")
@@ -152,7 +151,7 @@ def test_markup_is_preserved_in_captions_at_beginning(self):
152151
<a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
153152
</p></figcaption>"""
154153
chapter = Soup(text, 'html.parser')
155-
result = process_figures(chapter, Path('example'))
154+
result = process_figures(chapter)
156155
caption = result.find("figcaption")
157156
assert caption.find("strong")
158157
assert caption.find("em")
@@ -175,7 +174,7 @@ def test_no_anchor_wrap(self):
175174
<a class="headerlink" href="#markdown-fig" title="Permalink to this image">
176175
#</a></p></figcaption></figure>"""
177176
chapter = Soup(text, 'html.parser')
178-
result = process_figures(chapter, Path('example'))
177+
result = process_figures(chapter)
179178
assert result.find("figcaption")
180179
assert not result.find("figcaption").find("a", class_="headerlink")
181180
assert result.find("img").get("style") is None
@@ -197,7 +196,7 @@ def test_no_caption_number(self):
197196
<a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
198197
</p></figcaption>"""
199198
chapter = Soup(text, 'html.parser')
200-
result = process_figures(chapter, Path('example'))
199+
result = process_figures(chapter)
201200
assert result.find("figcaption")
202201
assert not result.find("figcaption").find("a", class_="headerlink")
203202
assert result.find("img").get("style") is None
@@ -217,7 +216,7 @@ def test_generated_figure_processing(self, code_generated_figure):
217216
"""
218217
Minimal generated figure test
219218
"""
220-
result = process_figures(code_generated_figure, "")
219+
result = process_figures(code_generated_figure)
221220
assert result.find("figure").get("id") == "code-output-fig"
222221
assert result.find("figcaption")
223222
assert not result.find("figcaption").find("a", class_="headerlink")

0 commit comments

Comments
 (0)