Skip to content

Commit b0e98da

Browse files
authored
Merge pull request #52 from oreillymedia/bug_fix_multi_top_level_sub_chapters
Bug fix multi top level sub chapters
2 parents 60e5618 + 9f5d7e8 commit b0e98da

19 files changed

+798
-32
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ Options:
7474

7575
## Release Notes
7676

77+
### 1.1.1
78+
79+
Bug fix:
80+
- Don't drop additional top-level headings in subchapter files
81+
7782
### 1.1.0
7883

7984
Features:

jupyter_book_to_htmlbook/file_processing.py

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -135,22 +135,66 @@ def apply_datatype(chapter, ch_name):
135135
return chapter
136136

137137

138+
def get_top_level_sections(soup):
139+
"""
140+
Helper utility to grab top-level sections in main <article>. Returns
141+
all but bibliography sections
142+
"""
143+
section_wrappers = soup.find_all("article", attrs={"role": "main"})
144+
145+
# test case for partial files, not expected in production
146+
if len(section_wrappers) == 0:
147+
sections = soup.find_all('section')
148+
elif len(section_wrappers) != 1:
149+
article = soup.find('article', attrs={"role": "main"})
150+
try:
151+
main_title = article.find('h1').get_text()
152+
except AttributeError:
153+
main_title = soup.find("h1")
154+
print("Warning: " +
155+
f"The chapter with title '{main_title}' is malformed.")
156+
return None, None
157+
else:
158+
main = section_wrappers[0]
159+
sections = []
160+
161+
for element in main.children:
162+
if (
163+
element.name == "section" and
164+
element.get('id') != "bibliography"
165+
):
166+
sections.append(element)
167+
168+
return sections
169+
170+
138171
def get_main_section(soup):
139172
"""
140173
Gets the main "section," or the main chapter text, and additionally
141174
checks to see if there is a separate bibliography section, returning
142175
that if it exists to be dealt with later.
143176
"""
144-
sections = soup.find_all('section')
177+
sections = get_top_level_sections(soup)
178+
145179
try:
146180
main = sections[0]
147-
except IndexError: # does not have a section class for top-level
148-
logging.warning("Looks like {toc_element.name} is malformed.")
149-
return None, None
181+
except IndexError:
182+
main = None
183+
150184
if len(sections) > 1:
151-
bibliography = soup.find('section', id="bibliography")
152-
else:
153-
bibliography = None
185+
article = soup.find('article', attrs={"role": "main"})
186+
try:
187+
main_title = article.find('h1').get_text()
188+
except AttributeError:
189+
main_title = soup.find("h1")
190+
err_msg = f"The chapter with title '{main_title}' " + \
191+
"has extra <section>s " + \
192+
"that will not be processed. Please check the " + \
193+
"notebook source files."
194+
logging.warning(err_msg)
195+
print(err_msg)
196+
bibliography = soup.find('section', id="bibliography")
197+
154198
return main, bibliography
155199

156200

@@ -172,11 +216,14 @@ def process_chapter_soup(
172216

173217
# perform initial swapping and namespace designation
174218
chapter, bib = get_main_section(base_soup)
219+
if bib and not chapter: # bibs can be their own chapters
220+
chapter = bib
221+
bib = None
175222

176223
if not chapter: # guard against malformed files
177224
logging.warning(f"Failed to process {toc_element}.")
178225
raise RuntimeError(
179-
f"Failed to process {toc_element}. Please check for error in " +
226+
f"Failed to process {toc_element}. Please check for errors in " +
180227
"your source file(s). Contact the Tools team for additional " +
181228
"support.")
182229

@@ -189,8 +236,10 @@ def process_chapter_soup(
189236

190237
if chapter_parts:
191238
for subfile in chapter_parts:
192-
subsection, sub_bib = process_chapter_subparts(subfile)
193-
chapter.append(subsection)
239+
subsections, sub_bib = process_chapter_subparts(subfile)
240+
if subsections:
241+
for subsection in subsections:
242+
chapter.append(subsection)
194243
if bib and sub_bib:
195244
entries = sub_bib.find_all("dd") # type: ignore
196245
bib.dl.extend(entries) # type: ignore
@@ -211,19 +260,24 @@ def process_chapter_subparts(subfile):
211260
""" processing for chapters with "sections" """
212261
with open(subfile, 'r') as f:
213262
soup = BeautifulSoup(f, 'lxml')
214-
section, bib = get_main_section(soup)
215-
section['data-type'] = 'sect1' # type: ignore
216-
del section['class'] # type: ignore
217-
# move id from empty span to section
218-
try:
219-
section['id'] = section.select_one('span')['id'] # type: ignore
220-
except TypeError:
221-
# fun fact, this happens when there's not numbering on the toc
222-
pass # like before, if it's not there that's OK.
223-
except KeyError:
224-
# fun fact, this happens when there is numbering on the toc
225-
pass # like before, if it's not there that's OK.
226-
return section, bib
263+
top_level_sections = get_top_level_sections(soup)
264+
265+
for section in top_level_sections:
266+
section['data-type'] = 'sect1' # type: ignore
267+
del section['class'] # type: ignore
268+
# move id from empty span to section
269+
try:
270+
section['id'] = section.select_one( # type: ignore
271+
'span')['id']
272+
except TypeError:
273+
# this happens when there's not numbering on the toc
274+
pass # like before, if it's not there that's OK.
275+
except KeyError:
276+
# fun fact, this happens when there is numbering on the toc
277+
pass # like before, if it's not there that's OK.
278+
bibliography = soup.find('section', id="bibliography")
279+
280+
return top_level_sections, bibliography
227281

228282

229283
def process_chapter(toc_element,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "jupyter-book-to-htmlbook"
3-
version = "1.1.0"
3+
version = "1.1.1"
44
description = "A script to convert jupyter book html files to htmlbook for consumption in Atlas"
55
authors = ["delfanbaum"]
66

tests/example_book/_build/html/bibliography.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@
163163
<ul class="nav bd-sidenav">
164164
<li class="toctree-l1"><a class="reference internal" href="notebooks/code_py.html">Code</a></li>
165165
<li class="toctree-l1"><a class="reference internal" href="notebooks/code_r.html">This Is a Title</a></li>
166+
</ul>
167+
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Edge Cases</span></p>
168+
<ul class="nav bd-sidenav">
169+
<li class="toctree-l1"><a class="reference internal" href="notebooks/many_a_levels.html">My Main Title</a></li>
170+
166171
</ul>
167172
<p aria-level="2" class="caption" role="heading"><span class="caption-text">References</span></p>
168173
<ul class="current nav bd-sidenav">

tests/example_book/_build/html/notebooks/ch01.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@
164164
<ul class="nav bd-sidenav">
165165
<li class="toctree-l1"><a class="reference internal" href="code_py.html">Code</a></li>
166166
<li class="toctree-l1"><a class="reference internal" href="code_r.html">This Is a Title</a></li>
167+
</ul>
168+
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Edge Cases</span></p>
169+
<ul class="nav bd-sidenav">
170+
<li class="toctree-l1"><a class="reference internal" href="many_a_levels.html">My Main Title</a></li>
171+
167172
</ul>
168173
<p aria-level="2" class="caption" role="heading"><span class="caption-text">References</span></p>
169174
<ul class="nav bd-sidenav">

tests/example_book/_build/html/notebooks/ch02.00.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@
164164
<ul class="nav bd-sidenav">
165165
<li class="toctree-l1"><a class="reference internal" href="code_py.html">Code</a></li>
166166
<li class="toctree-l1"><a class="reference internal" href="code_r.html">This Is a Title</a></li>
167+
</ul>
168+
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Edge Cases</span></p>
169+
<ul class="nav bd-sidenav">
170+
<li class="toctree-l1"><a class="reference internal" href="many_a_levels.html">My Main Title</a></li>
171+
167172
</ul>
168173
<p aria-level="2" class="caption" role="heading"><span class="caption-text">References</span></p>
169174
<ul class="nav bd-sidenav">

tests/example_book/_build/html/notebooks/ch02.01.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@
164164
<ul class="nav bd-sidenav">
165165
<li class="toctree-l1"><a class="reference internal" href="code_py.html">Code</a></li>
166166
<li class="toctree-l1"><a class="reference internal" href="code_r.html">This Is a Title</a></li>
167+
</ul>
168+
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Edge Cases</span></p>
169+
<ul class="nav bd-sidenav">
170+
<li class="toctree-l1"><a class="reference internal" href="many_a_levels.html">My Main Title</a></li>
171+
167172
</ul>
168173
<p aria-level="2" class="caption" role="heading"><span class="caption-text">References</span></p>
169174
<ul class="nav bd-sidenav">

tests/example_book/_build/html/notebooks/ch02.02.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@
164164
<ul class="nav bd-sidenav">
165165
<li class="toctree-l1"><a class="reference internal" href="code_py.html">Code</a></li>
166166
<li class="toctree-l1"><a class="reference internal" href="code_r.html">This Is a Title</a></li>
167+
</ul>
168+
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Edge Cases</span></p>
169+
<ul class="nav bd-sidenav">
170+
<li class="toctree-l1"><a class="reference internal" href="many_a_levels.html">My Main Title</a></li>
171+
167172
</ul>
168173
<p aria-level="2" class="caption" role="heading"><span class="caption-text">References</span></p>
169174
<ul class="nav bd-sidenav">

tests/example_book/_build/html/notebooks/code_py.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@
164164
<ul class="current nav bd-sidenav">
165165
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Code</a></li>
166166
<li class="toctree-l1"><a class="reference internal" href="code_r.html">This Is a Title</a></li>
167+
</ul>
168+
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Edge Cases</span></p>
169+
<ul class="nav bd-sidenav">
170+
<li class="toctree-l1"><a class="reference internal" href="many_a_levels.html">My Main Title</a></li>
171+
167172
</ul>
168173
<p aria-level="2" class="caption" role="heading"><span class="caption-text">References</span></p>
169174
<ul class="nav bd-sidenav">

tests/example_book/_build/html/notebooks/code_r.html

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
<script>DOCUMENTATION_OPTIONS.pagename = 'notebooks/code_r';</script>
6565
<link rel="index" title="Index" href="../genindex.html" />
6666
<link rel="search" title="Search" href="../search.html" />
67-
<link rel="next" title="Markup Tests" href="markup.html" />
67+
<link rel="next" title="My Main Title" href="many_a_levels.html" />
6868
<link rel="prev" title="Code" href="code_py.html" />
6969
<meta name="viewport" content="width=device-width, initial-scale=1"/>
7070
<meta name="docsearch:language" content="en"/>
@@ -164,6 +164,11 @@
164164
<ul class="current nav bd-sidenav">
165165
<li class="toctree-l1"><a class="reference internal" href="code_py.html">Code</a></li>
166166
<li class="toctree-l1 current active"><a class="current reference internal" href="#">This Is a Title</a></li>
167+
</ul>
168+
<p aria-level="2" class="caption" role="heading"><span class="caption-text">Edge Cases</span></p>
169+
<ul class="nav bd-sidenav">
170+
<li class="toctree-l1"><a class="reference internal" href="many_a_levels.html">My Main Title</a></li>
171+
167172
</ul>
168173
<p aria-level="2" class="caption" role="heading"><span class="caption-text">References</span></p>
169174
<ul class="nav bd-sidenav">
@@ -449,11 +454,11 @@ <h1>This Is a Title<a class="headerlink" href="#this-is-a-title" title="Permalin
449454
</div>
450455
</a>
451456
<a class="right-next"
452-
href="markup.html"
457+
href="many_a_levels.html"
453458
title="next page">
454459
<div class="prev-next-info">
455460
<p class="prev-next-subtitle">next</p>
456-
<p class="prev-next-title">Markup Tests</p>
461+
<p class="prev-next-title">My Main Title</p>
457462
</div>
458463
<i class="fa-solid fa-angle-right"></i>
459464
</a>

0 commit comments

Comments
 (0)