diff --git a/jupyter_book_to_htmlbook/file_processing.py b/jupyter_book_to_htmlbook/file_processing.py index 91d93c3..c755db0 100644 --- a/jupyter_book_to_htmlbook/file_processing.py +++ b/jupyter_book_to_htmlbook/file_processing.py @@ -141,10 +141,15 @@ def get_top_level_sections(soup): all but bibliography sections """ section_wrappers = soup.find_all("article", attrs={"role": "main"}) + top_level_sections = [] # test case for partial files, not expected in production if len(section_wrappers) == 0: sections = soup.find_all('section') + + for section in sections: + if section.find_parent('section') is None: + top_level_sections.append(section) elif len(section_wrappers) != 1: article = soup.find('article', attrs={"role": "main"}) try: @@ -156,16 +161,15 @@ def get_top_level_sections(soup): return None, None else: main = section_wrappers[0] - sections = [] for element in main.children: if ( element.name == "section" and element.get('id') != "bibliography" ): - sections.append(element) + top_level_sections.append(element) - return sections + return top_level_sections def get_main_section(soup): diff --git a/tests/example_book/_build/html/no_wrapper.html b/tests/example_book/_build/html/no_wrapper.html new file mode 100644 index 0000000..0149cee --- /dev/null +++ b/tests/example_book/_build/html/no_wrapper.html @@ -0,0 +1,438 @@ + + + + + + + + + + + + Chapter Title — My Jupyter Book + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ + + +
+
+ +
+ + + + + + + + + + + + + +
+ +
+ + + +
+ +
+
+ +
+
+ +
+ +
+ +
+ + +
+ +
+ +
+ + + + + + + + + + + + + + + + + +
+ +
+ +
+
+ + + + + + + + +
+ +
+

Chapter Title#

+

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin vehicula mauris nulla, id finibus orci accumsan placerat. Mauris rhoncus ex in ipsum pellentesque, sed blandit arcu scelerisque. Aenean ut malesuada ligula. Mauris porttitor finibus nunc in dignissim. Phasellus sed diam gravida, aliquet nisi quis, placerat sapien. In augue lacus, egestas accumsan vulputate non, gravida at ex. Vivamus malesuada lectus nec blandit ultricies. Duis feugiat tortor vitae velit accumsan, eu imperdiet purus mattis. Fusce eu viverra arcu. Duis nec ex varius, tristique turpis ut, blandit sapien. Quisque non pellentesque nulla, vitae varius arcu. In maximus auctor pretium. Nunc elementum ac diam ut lobortis. Aliquam nec sapien eu ligula commodo aliquam. Integer in molestie libero, dignissim rhoncus lorem. Donec a laoreet magna.

+
+

First First-Level Subsection Heading#

+

Sed nec vehicula nibh. Praesent vitae commodo lacus, sed vestibulum lacus. In egestas felis faucibus ultrices dapibus. Sed consectetur quam et leo tristique convallis. Nulla a diam libero. Vestibulum sit amet neque imperdiet, dictum urna sed, vehicula dolor. Integer tincidunt vulputate ex, at cursus tortor molestie sed. Maecenas at posuere justo. Nunc massa erat, rutrum id volutpat gravida, rhoncus in quam. Phasellus ex purus, consectetur nec dui ut, ultrices porta tortor.

+
+
+

Section First-Level Subsection Heading#

+

Since 1973 the U.S. Centers for Disease Control and Prevention (CDC) have conducted the National Survey of Family Growth (NSFG), which is intended to gather “information on family life, marriage and divorce, pregnancy, infertility, use of contraception, and men’s and women’s health. The survey results are used…to plan health services and health education programs, and to do statistical studies of families, fertility, and health.”

+
+
+

Third First-Level Subsection Heading#

+

In malesuada sagittis libero ut bibendum. Sed dolor lacus, placerat eu ultricies nec, sollicitudin imperdiet sem. Vestibulum condimentum est a lacinia suscipit. Fusce vitae lacus et magna sollicitudin pulvinar. Morbi erat ipsum, vehicula sed orci et, rutrum hendrerit justo. Phasellus porttitor elementum mattis. Ut et commodo ligula.

+
+
+ + + + +
+ + + + +
+ +
+ +
+
+
+ +
+ +
+ +
+ + + + + + +
+ + + +
+
+
+ + + + + + + + \ No newline at end of file diff --git a/tests/test_file_processing.py b/tests/test_file_processing.py index 35c52f2..0a61ac5 100644 --- a/tests/test_file_processing.py +++ b/tests/test_file_processing.py @@ -64,6 +64,28 @@ def test_process_chapter_single_chapter_file(self, tmp_book_path): # check on return assert "ch01.html" in result + def test_process_chapter_with_no_main_wrapper(self, tmp_book_path): + """ + ensure a chapter with no main wrapper but with a single + top-level section can be processed + + observed edge cases: + article wrapper present but has no role attr with value of main + wrapper element is of type main rather than article + """ + test_env = tmp_book_path + test_out = test_env / 'output' + test_out.mkdir() + + process_chapter((test_env / 'no_wrapper.html'), test_env, test_out) + + with open(test_out / 'no_wrapper.html') as f: + text = f.read() + assert 'section data-type="chapter" id="chapter-1"' in text + assert 'data-type="sect1" id="section-1"' in text + assert 'data-type="sect1" id="section-2"' in text + assert 'data-type="sect1" id="section-3"' in text + def test_process_chapter_single_file_with_multiple_h1s(self, tmp_book_path, caplog,