Skip to content

Commit 9e1d583

Browse files
authored
Merge branch 'main' into dependabot/pip/ipython-8.10.0
2 parents 343adc2 + e47528e commit 9e1d583

File tree

4 files changed

+43
-16
lines changed

4 files changed

+43
-16
lines changed

README.md

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
Takes a Jupyter Book and turns it into an HTMLBook-compliant project for consumption in Atlas, O'Reilly's book building tool. The script runs `jupyter-book` on your book directory (the one containing your *_config.yml* and *_toc.yml* files), and puts HTMLBook files in the specified target directory, updating atlas.json if it's provided.
44

5-
**IMPORTANT**: We're now at 1.0.0, i.e., we have introduced a very breaking-change from the original version of the script!
5+
**IMPORTANT**: We're now at >1.0.0, i.e., we have introduced a very breaking-change from the original version of the script!
66

77
## Installation
88

99
**NOTE**: This tool requires Python ^3.9.
1010

11-
It's not on PYPI yet, so install via the GitHub link:
11+
Install via the GitHub link:
1212

1313
```
1414
pip install git+https://github.com/oreillymedia/jupyter-book-to-htmlbook.git
@@ -54,6 +54,8 @@ Options:
5454
--skip-jb-build Skip running `jupyter-book` as a part of
5555
this conversion
5656
--skip-numbering Skip the numbering of In[]/Out[] code cells
57+
--include-root Include the 'root' file of the jupyter-book
58+
project
5759
--version
5860
--install-completion [bash|zsh|fish|powershell|pwsh]
5961
Install completion for the specified shell.
@@ -64,11 +66,9 @@ Options:
6466
6567
```
6668

67-
## Known Limitations
69+
## Current Known Limitations
6870

69-
* Cross references to bare files (e.g., `see [chapter 1](chapter01.ipynb)`) aren't converting as expected; in the meantime please use a heading anchor (e.g., `see [chapter 1](chapter01.ipynb#first-heading)`).
70-
* The `"pagenumrestart"` class is currently applied to the first chapter with parts (assuming that the chapters are numbered); this is a limitation to be overcome later (if there is a single-file chapter 1, a part, etc.).
71-
* Currently, bibliography references are "opinionated," and are meant to follow CMS author-date in terms of in-text citations (no work has been done on the actual *references.html* yet).
71+
* Jupyter Book can only process one metadata-named code-generated figure per file. The workaround for this is to save any resultant figures to disk and refer to them as any other figure.
7272

7373
## Release Notes
7474

@@ -78,10 +78,12 @@ Features:
7878
- Add support for formal code examples in Python and R via the "example" cell tag
7979
- Add support for glossaries
8080
- Add basic support for bibtex bibliographies
81+
- Align sidebar heading levels with changes in Atlas
8182

8283
Bug fixes:
8384
- Fix bug with top-level heading IDs causing xrefs to fail
8485
- Remove extraneous spacing in figure captions
86+
- Remove epub-breaking attrs (incl. `valign` and `halign` on table cells)
8587

8688
### 1.0.6
8789
- Add support for sidebars as described in the [Jupyter Book documentation](https://jupyterbook.org/en/stable/content/layout.html#sidebars-within-content)

jupyter_book_to_htmlbook/text_processing.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
1-
import re
2-
3-
41
def clean_chapter(chapter, rm_numbering=True):
52
"""
63
"Cleans" the chapter from any script or style tags, removes table borders,
7-
removes any style attrs, and by default removes any section numbering.
4+
table valign/width attributes, removes any style attrs, and by default
5+
removes any section numbering.
86
"""
97
remove_tags = ['style', 'script']
8+
remove_attrs = ['style', 'valign', 'halign', 'width']
9+
1010
all_tags = chapter.find_all()
1111
for tag in all_tags:
1212
if tag.name in remove_tags:
1313
tag.decompose()
1414
if tag.name == 'table':
1515
del tag['border']
16-
for tag in chapter.find_all(attrs={'style': True}):
17-
del tag['style']
16+
17+
for attr in remove_attrs:
18+
for tag in chapter.find_all(attrs={attr: True}):
19+
del tag[attr]
1820

1921
# (optionally) remove numbering
2022
if rm_numbering:
@@ -70,6 +72,6 @@ def process_sidebars(chapter):
7072

7173
if aside.find("p", class_="sidebar-title"):
7274
title = aside.find("p", class_="sidebar-title")
73-
title.name = "h5"
75+
title.name = "h1"
7476

7577
return chapter

tests/test_file_processing.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def test_process_chapter_single_chapter_file(self, tmp_path, capsys):
5252
# check on return
5353
assert "ch01.html" in result
5454

55-
def test_chapter_promote_headings(self, tmp_path, caplog):
55+
def test_chapter_promote_headings(self, tmp_path):
5656
"""
5757
we expect to have a single h1 and then a bunch of h2s
5858
in a single-file chapter, but we need to promote all the headings
@@ -227,9 +227,10 @@ def test_process_chapter_totally_invalid_file(self, tmp_path, caplog):
227227
<h1>Hello!</h1>
228228
</div>""")
229229
# first item is the intro file, so let's check on the first "chapter"
230+
caplog.set_level(logging.DEBUG)
230231
with pytest.raises(RuntimeError):
231232
process_chapter(tmp_path / 'malformed.html', tmp_path)
232-
assert "Failed to process" in caplog.text
233+
assert "Failed to process" in caplog.text
233234

234235
@pytest.mark.parametrize(
235236
"datatype", [

tests/test_text_processing.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,28 @@ def test_chapter_cleans():
4040
</h2>"""
4141

4242

43+
def test_chapter_cleans_table_specific():
44+
"""
45+
A few table-specific edge cases to check, including a no-border table
46+
and tables with valign/width attributes
47+
"""
48+
chapter = BeautifulSoup("""<table>
49+
<tr halign="left">
50+
<th rowspan="2" valign="top">0</th>
51+
<td width="50%">NaN</td>
52+
<td>NaN</td>
53+
<td>NaN</td>
54+
</tr>
55+
</table>""", "html.parser")
56+
result = clean_chapter(chapter)
57+
halign_tr = result.find("tr")
58+
valign_th = result.find("th")
59+
width_td = result.find("td") # it'll find the first
60+
assert not halign_tr.get("valign")
61+
assert not valign_th.get("valign")
62+
assert not width_td.get("width")
63+
64+
4365
def test_move_span_ids_to_sections():
4466
"""
4567
Atlas requires that cross reference targets sections so that
@@ -74,4 +96,4 @@ def test_sidebar_processing():
7496
</aside>""", "html.parser")
7597
process_sidebars(chapter_text)
7698
assert chapter_text.find("aside")["data-type"] == "sidebar"
77-
assert chapter_text.find("h5").string == "Here Is a Sidebar Title"
99+
assert chapter_text.find("h1").string == "Here Is a Sidebar Title"

0 commit comments

Comments
 (0)