Skip to content

Commit 07f7aa8

Browse files
committed
Guard against figs wrapped in paragraphs
This is an edge case (in fact, I'm not entirely sure how to reproduce this from Jupyter Book, but an author's managed it, so), but figures wrapped in <p> tags does cause issues in Atlas. This adds a simple guard against that along with an associated test for coverage.
1 parent c70efa1 commit 07f7aa8

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

jupyter_book_to_htmlbook/figure_processing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ def process_figures(chapter):
88
"""
99
figures = chapter.find_all("figure")
1010
for figure in figures:
11+
12+
if figure.parent.name == "p":
13+
figure.parent.unwrap()
14+
1115
# clean anything extraneous, if extant
1216
if figure.find_all('a', class_="headerlink") != []:
1317
for anchor in figure.find_all('a', class_="headerlink"):

tests/test_figure_processing.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,36 @@ def test_no_caption_number(self):
202202
assert result.find("img").get("style") is None
203203
assert not result.find("span", class_="caption-number")
204204

205+
def test_fig_wrapped_in_p_is_unwrapped(self):
206+
"""
207+
Edge case where sometimes figures end up wrapped inside <p> tags,
208+
causing issues with rendering in Atlas. We should ensure that all
209+
figures -- formal or informal -- do not end up wrapped inside
210+
paragraphs
211+
"""
212+
fig = """<p><figure class="align-default" id="example-fig">
213+
<a class="reference internal image-reference" href="images/flower.png">
214+
<img alt="images/flower.png" src="../_images/flower.png"
215+
style="height: 150px;" /></a>
216+
<figcaption>
217+
<p><span class="caption-number">Fig. 1 </span><span class="caption-text">
218+
Here is my figure caption!</span>
219+
<a class="headerlink" href="#example-fig" title="Permalink to this image">#</a>
220+
</p></figcaption></p>"""
221+
informal_fig = '<p><img alt="Flower" src="flower.png" /></p>'
222+
text = f"<section>{fig}{informal_fig}</section>"
223+
chapter = Soup(text, "html.parser")
224+
process_figures(chapter)
225+
process_informal_figs(chapter)
226+
227+
figs = chapter.find_all("figure")
228+
assert len(figs) == 2
229+
230+
for fig in figs:
231+
assert not fig.parent.name == "p"
232+
233+
assert not chapter.find('p')
234+
205235

206236
class TestGeneratedFigureProcessing:
207237
"""

0 commit comments

Comments
 (0)