Merge branch 'main' into dependabot/pip/ipython-8.10.0

delfanbaum · web-flow · commit 9e1d583d7d56 · 2023-03-10T12:48:20.000-06:00
diff --git a/README.md b/README.md
@@ -2,13 +2,13 @@
 
 Takes a Jupyter Book and turns it into an HTMLBook-compliant project for consumption in Atlas, O'Reilly's book building tool. The script runs `jupyter-book` on your book directory (the one containing your *_config.yml* and *_toc.yml* files), and puts HTMLBook files in the specified target directory, updating atlas.json if it's provided.
 
-**IMPORTANT**: We're now at 1.0.0, i.e., we have introduced a very breaking-change from the original version of the script! 
+**IMPORTANT**: We're now at >1.0.0, i.e., we have introduced a very breaking-change from the original version of the script! 
 
 ## Installation
 
 **NOTE**: This tool requires Python ^3.9.
 
-It's not on PYPI yet, so install via the GitHub link:
+Install via the GitHub link:
 
 ```
 pip install git+https://github.com/oreillymedia/jupyter-book-to-htmlbook.git
@@ -54,6 +54,8 @@ Options:
   --skip-jb-build                 Skip running `jupyter-book` as a part of
                                   this conversion
   --skip-numbering      Skip the numbering of In[]/Out[] code cells
+  --include-root                  Include the 'root' file of the jupyter-book
+                                  project
   --version
   --install-completion [bash|zsh|fish|powershell|pwsh]
                                   Install completion for the specified shell.
@@ -64,11 +66,9 @@ Options:
 
 ```
 
-## Known Limitations
+## Current Known Limitations
 
-* Cross references to bare files (e.g., `see [chapter 1](chapter01.ipynb)`) aren't converting as expected; in the meantime please use a heading anchor (e.g., `see [chapter 1](chapter01.ipynb#first-heading)`).
-* The `"pagenumrestart"` class is currently applied to the first chapter with parts (assuming that the chapters are numbered); this is a limitation to be overcome later (if there is a single-file chapter 1, a part, etc.).
-* Currently, bibliography references are "opinionated," and are meant to follow CMS author-date in terms of in-text citations (no work has been done on the actual *references.html* yet).
+* Jupyter Book can only process one metadata-named code-generated figure per file. The workaround for this is to save any resultant figures to disk and refer to them as any other figure.
 
 ## Release Notes
 
@@ -78,10 +78,12 @@ Features:
 - Add support for formal code examples in Python and R via the "example" cell tag
 - Add support for glossaries
 - Add basic support for bibtex bibliographies
+- Align sidebar heading levels with changes in Atlas
 
 Bug fixes:
 - Fix bug with top-level heading IDs causing xrefs to fail
 - Remove extraneous spacing in figure captions
+- Remove epub-breaking attrs (incl. `valign` and `halign` on table cells)
 
 ### 1.0.6
 - Add support for sidebars as described in the [Jupyter Book documentation](https://jupyterbook.org/en/stable/content/layout.html#sidebars-within-content)
diff --git a/jupyter_book_to_htmlbook/text_processing.py b/jupyter_book_to_htmlbook/text_processing.py
@@ -1,20 +1,22 @@
-import re
-
-
 def clean_chapter(chapter, rm_numbering=True):
     """
     "Cleans" the chapter from any script or style tags, removes table borders,
-    removes any style attrs, and by default removes any section numbering.
+    table valign/width attributes, removes any style attrs, and by default
+    removes any section numbering.
     """
     remove_tags = ['style', 'script']
+    remove_attrs = ['style', 'valign', 'halign', 'width']
+
     all_tags = chapter.find_all()
     for tag in all_tags:
         if tag.name in remove_tags:
             tag.decompose()
         if tag.name == 'table':
             del tag['border']
-    for tag in chapter.find_all(attrs={'style': True}):
-        del tag['style']
+
+    for attr in remove_attrs:
+        for tag in chapter.find_all(attrs={attr: True}):
+            del tag[attr]
 
     # (optionally) remove numbering
     if rm_numbering:
@@ -70,6 +72,6 @@ def process_sidebars(chapter):
 
         if aside.find("p", class_="sidebar-title"):
             title = aside.find("p", class_="sidebar-title")
-            title.name = "h5"
+            title.name = "h1"
 
     return chapter
diff --git a/tests/test_file_processing.py b/tests/test_file_processing.py
@@ -52,7 +52,7 @@ def test_process_chapter_single_chapter_file(self, tmp_path, capsys):
         # check on return
         assert "ch01.html" in result
 
-    def test_chapter_promote_headings(self, tmp_path, caplog):
+    def test_chapter_promote_headings(self, tmp_path):
         """
         we expect to have a single h1 and then a bunch of h2s
         in a single-file chapter, but we need to promote all the headings
@@ -227,9 +227,10 @@ def test_process_chapter_totally_invalid_file(self, tmp_path, caplog):
     <h1>Hello!</h1>
 </div>""")
         # first item is the intro file, so let's check on the first "chapter"
+        caplog.set_level(logging.DEBUG)
         with pytest.raises(RuntimeError):
             process_chapter(tmp_path / 'malformed.html', tmp_path)
-            assert "Failed to process" in caplog.text
+        assert "Failed to process" in caplog.text
 
     @pytest.mark.parametrize(
             "datatype", [
diff --git a/tests/test_text_processing.py b/tests/test_text_processing.py
@@ -40,6 +40,28 @@ def test_chapter_cleans():
 </h2>"""
 
 
+def test_chapter_cleans_table_specific():
+    """
+    A few table-specific edge cases to check, including a no-border table
+    and tables with valign/width attributes
+    """
+    chapter = BeautifulSoup("""<table>
+<tr halign="left">
+<th rowspan="2" valign="top">0</th>
+<td width="50%">NaN</td>
+<td>NaN</td>
+<td>NaN</td>
+</tr>
+</table>""", "html.parser")
+    result = clean_chapter(chapter)
+    halign_tr = result.find("tr")
+    valign_th = result.find("th")
+    width_td = result.find("td")  # it'll find the first
+    assert not halign_tr.get("valign")
+    assert not valign_th.get("valign")
+    assert not width_td.get("width")
+
+
 def test_move_span_ids_to_sections():
     """
     Atlas requires that cross reference targets sections so that
@@ -74,4 +96,4 @@ def test_sidebar_processing():
 </aside>""", "html.parser")
     process_sidebars(chapter_text)
     assert chapter_text.find("aside")["data-type"] == "sidebar"
-    assert chapter_text.find("h5").string == "Here Is a Sidebar Title"
+    assert chapter_text.find("h1").string == "Here Is a Sidebar Title"