Skip to content

Commit ad479c3

Browse files
authored
Merge pull request #50 from oreillymedia/TOOLSDEV-43-version-1-1-0
Toolsdev 43 version 1 1 0
2 parents 2ba56ce + 76187f4 commit ad479c3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+5613
-4756
lines changed

.coveragerc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ exclude_lines =
55
def main
66
app()
77
except ImportError
8+
if build.returncode != 0

README.md

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Usage:`jb2htmlbook [OPTIONS] SOURCE TARGET`
2727
Help text:
2828

2929
```
30+
$ jb2htmlbook --help
3031
Usage: jb2htmlbook [OPTIONS] SOURCE TARGET
3132
3233
Converts a Jupyter Book project into HTMLBook.
@@ -53,29 +54,47 @@ Options:
5354
--atlas-json TEXT Path to the book's atlas.json file
5455
--skip-jb-build Skip running `jupyter-book` as a part of
5556
this conversion
56-
--skip-numbering Skip the numbering of In[]/Out[] code cells
57+
--skip-numbering Skip the numbering of In[]/Out[] code cells
5758
--include-root Include the 'root' file of the jupyter-book
5859
project
60+
--keep-highlighting Preserve any code highlighting provided by
61+
Jupyter Book
5962
--version
6063
--install-completion [bash|zsh|fish|powershell|pwsh]
6164
Install completion for the specified shell.
6265
--show-completion [bash|zsh|fish|powershell|pwsh]
6366
Show completion for the specified shell, to
6467
copy it or customize the installation.
6568
--help Show this message and exit.
66-
6769
```
6870

69-
## Current Known Limitations
71+
## Current (Known) Limitations
7072

7173
* Jupyter Book can only process one metadata-named code-generated figure per file. The workaround for this is to save any resultant figures to disk and refer to them as any other figure.
7274

7375
## Release Notes
7476

77+
### 1.1.0
78+
79+
Features:
80+
- Upgrade to Jupyter Book v.0.15.1
81+
- `--keep-highlighting` flag to preserve syntax highlighting for code blocks provided by Jupyter Book
82+
83+
Bug fixes:
84+
- Table caption numbering is now removed from output (was causing duplicates in Atlas builds)
85+
- Additional safeguards around figure markup
86+
- Generated part.html files now have correctly-numbered (and unique) IDs
87+
88+
Quality of life improvements:
89+
- Explicit tests for common PE tasks
90+
- Additional examples in the `example_book`
91+
- Improved type hints
92+
93+
7594
### 1.0.9
7695

7796
Bug fixes:
78-
- Remove spans inside `<code>` tags that display incorrectly on the ORM learning platform.
97+
- Remove spans inside `<code>` tags that display incorrectly on the ORM learning platform
7998

8099
### 1.0.8
81100

conftest.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import pytest
2+
import shutil
3+
import subprocess
4+
from pathlib import Path
5+
6+
7+
@pytest.fixture(scope="class")
8+
def fresh_book_html(tmp_path_factory):
9+
"""
10+
A fresh Jupyter Build (if possible) to do some markup checks
11+
"""
12+
test_env = tmp_path_factory.mktemp("example_book_html")
13+
shutil.copytree('tests/example_book', test_env, dirs_exist_ok=True)
14+
# Run `jb build`j in the tmp_path
15+
build = subprocess.run(["jb", "build", test_env],
16+
capture_output=True)
17+
18+
if build.returncode != 0:
19+
return Path("tests/example_book/_build/html")
20+
else:
21+
return test_env / "_build/html"
22+
23+
24+
@pytest.fixture()
25+
def tmp_book_path(tmp_path):
26+
"""
27+
Provides a copy of the example_book html jupyter book build
28+
for use in tests.
29+
"""
30+
test_env = tmp_path / 'tmp'
31+
test_env.mkdir()
32+
shutil.copytree('tests/example_book/_build/html',
33+
test_env, dirs_exist_ok=True)
34+
return test_env

jupyter_book_to_htmlbook/code_processing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import re
22
import logging
3+
from typing import Union
34
from bs4 import NavigableString # type: ignore
45
from .helpers import base_soup
56

67

7-
def process_code(chapter, skip_numbering=False):
8+
def process_code(chapter, skip_numbering: Union[bool, None] = False):
89
"""
910
Turn rendered <pre> blocks into appropriately marked-up HTMLBook
1011
"""

jupyter_book_to_htmlbook/figure_processing.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
1-
from pathlib import Path
21
from bs4 import NavigableString # type: ignore
32

43

5-
def process_figures(chapter, build_dir: Path):
4+
def process_figures(chapter):
65
"""
76
Takes a chapter soup and handles changing the references to figures
87
to the /images directory per usual htmlbook repo
98
"""
109
figures = chapter.find_all("figure")
1110
for figure in figures:
11+
12+
if figure.parent.name == "p":
13+
figure.parent.unwrap()
14+
1215
# clean anything extraneous, if extant
1316
if figure.find_all('a', class_="headerlink") != []:
1417
for anchor in figure.find_all('a', class_="headerlink"):
@@ -42,13 +45,13 @@ def process_figures(chapter, build_dir: Path):
4245
return chapter
4346

4447

45-
def process_informal_figs(chapter, build_dir=''):
48+
def process_informal_figs(chapter):
4649
"""
4750
This should be run *AFTER* process figs, but basically just repoints the
4851
img tags.
4952
"""
5053
for img in chapter.find_all('img'):
51-
# Since, weirdly, a myst-marked image will be in a floating anchor,
54+
# Since, weirdly, a myst-marked image will be in a floating anchor
5255
if img.parent.name == 'a':
5356
img.parent.name = "figure"
5457
img.parent['class'] = "informal"

jupyter_book_to_htmlbook/file_processing.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import re
33
from pathlib import Path
4-
from typing import Union
4+
from typing import Union, Optional, Tuple
55
from bs4 import BeautifulSoup # type: ignore
66
from .admonition_processing import process_admonitions
77
from .figure_processing import process_figures, process_informal_figs
@@ -37,10 +37,12 @@ def process_part(part_path: Path, output_dir: Path):
3737
part_number = info.group(1)
3838
# undo earlier space replacement and do a simple title case
3939
part_name = info.group(2).replace('-', ' ')
40+
# just to make the string fit better
41+
ns = "http://www.w3.org/1999/xhtml"
4042

4143
with open(output_dir / f'part-{part_number}.html', 'wt') as f:
4244
f.write(f"""
43-
<div xmlns="http://www.w3.org/1999/xhtml" data-type="part" id="part-1">
45+
<div xmlns="{ns}" data-type="part" id="part-{part_number}">
4446
<h1>{part_name}</h1>
4547
</div>""".lstrip())
4648
return f'part-{part_number}.html'
@@ -151,7 +153,8 @@ def get_main_section(soup):
151153
return main, bibliography
152154

153155

154-
def process_chapter_soup(toc_element: Union[Path, list[Path]]):
156+
def process_chapter_soup(
157+
toc_element: Union[Path, list[Path]]) -> Tuple[BeautifulSoup, str]:
155158
""" unified file chapter processing """
156159

157160
if isinstance(toc_element, list): # i.e., an ordered list of chapter parts
@@ -169,8 +172,12 @@ def process_chapter_soup(toc_element: Union[Path, list[Path]]):
169172
# perform initial swapping and namespace designation
170173
chapter, bib = get_main_section(base_soup)
171174

172-
if not chapter:
173-
return None, None
175+
if not chapter: # guard against malformed files
176+
logging.warning(f"Failed to process {toc_element}.")
177+
raise RuntimeError(
178+
f"Failed to process {toc_element}. Please check for error in " +
179+
"your source file(s). Contact the Tools team for additional " +
180+
"support.")
174181

175182
else:
176183
chapter['xmlns'] = 'http://www.w3.org/1999/xhtml' # type: ignore
@@ -184,8 +191,8 @@ def process_chapter_soup(toc_element: Union[Path, list[Path]]):
184191
subsection, sub_bib = process_chapter_subparts(subfile)
185192
chapter.append(subsection)
186193
if bib and sub_bib:
187-
entries = sub_bib.find_all("dd")
188-
bib.dl.extend(entries)
194+
entries = sub_bib.find_all("dd") # type: ignore
195+
bib.dl.extend(entries) # type: ignore
189196
elif sub_bib:
190197
bib = sub_bib
191198

@@ -222,7 +229,8 @@ def process_chapter(toc_element,
222229
source_dir,
223230
build_dir=Path('.'),
224231
book_ids: list = [],
225-
skip_cell_numbering: bool = False):
232+
skip_cell_numbering: Optional[bool] = False,
233+
keep_highlighting: Optional[bool] = False):
226234
"""
227235
Takes a list of chapter files and chapter lists and then writes the chapter
228236
to the root directory in which the script is run. Note that this assumes
@@ -232,26 +240,20 @@ def process_chapter(toc_element,
232240
chapter, ch_name = process_chapter_soup(toc_element)
233241
logging.info(f"Processing {ch_name}...")
234242

235-
if not chapter: # guard against malformed files
236-
logging.warning(f"Failed to process {toc_element}.")
237-
raise RuntimeError(
238-
f"Failed to process {toc_element}. Please check for error in " +
239-
"your source file(s). Contact the Tools team for additional " +
240-
"support.")
241-
242243
# perform cleans and processing
243244
chapter = clean_chapter(chapter)
244245
# note: must process figs before xrefs
245-
chapter = process_figures(chapter, build_dir)
246-
chapter = process_informal_figs(chapter, build_dir)
246+
chapter = process_figures(chapter)
247+
chapter = process_informal_figs(chapter)
247248
chapter = process_internal_refs(chapter)
248249
chapter = process_citations(chapter)
249250
chapter = process_footnotes(chapter)
250251
chapter = process_admonitions(chapter)
251252
chapter = process_math(chapter)
252253
# note: best to run examples before code processing
253254
chapter = process_code_examples(chapter)
254-
chapter = process_code(chapter, skip_cell_numbering)
255+
if not keep_highlighting:
256+
chapter = process_code(chapter, skip_cell_numbering)
255257
chapter = process_inline_code(chapter)
256258
chapter = move_span_ids_to_sections(chapter)
257259
chapter = process_sidebars(chapter)

jupyter_book_to_htmlbook/footnote_processing.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ def process_footnotes(chapter):
1212
try:
1313
ref_id = ref.get('href').split('#')[-1]
1414
# double next_sibling b/c next sibling is a space
15-
ref_location = chapter.find(
16-
"dt", {"id": ref_id}
17-
).next_sibling.next_sibling
15+
ref_location = chapter.find(id=ref_id).next_sibling.next_sibling
1816
footnote_contents = ref_location.find('p').children
1917
ref.name = 'span'
2018
ref['data-type'] = 'footnote'

jupyter_book_to_htmlbook/main.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ def jupter_book_to_htmlbook(
4343
False,
4444
"--include-root",
4545
help="Include the 'root' file of the jupyter-book project"),
46+
keep_highlighting: Optional[bool] = typer.Option(
47+
False,
48+
"--keep-highlighting",
49+
help="Preserve any code highlighting provided by Jupyter Book",
50+
),
4651
version: Optional[bool] = typer.Option(
4752
None,
4853
"--version",
@@ -132,7 +137,8 @@ def jupter_book_to_htmlbook(
132137
source_dir,
133138
output_dir,
134139
book_ids,
135-
skip_cell_numbering)
140+
skip_cell_numbering,
141+
keep_highlighting)
136142
processed_files.append(f'{target}/{file}')
137143
book_ids.extend(chapter_ids)
138144

jupyter_book_to_htmlbook/text_processing.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
def clean_chapter(chapter, rm_numbering=True):
22
"""
33
"Cleans" the chapter from any script or style tags, removes table borders,
4-
table valign/width attributes, removes any style attrs, and by default
5-
removes any section numbering.
4+
table valign/width attributes, caption numbering, removes any style attrs,
5+
and by default removes any section numbering.
66
"""
77
remove_tags = ['style', 'script']
88
remove_attrs = ['style', 'valign', 'halign', 'width']
@@ -13,6 +13,8 @@ def clean_chapter(chapter, rm_numbering=True):
1313
tag.decompose()
1414
if tag.name == 'table':
1515
del tag['border']
16+
if tag.find("span", class_="caption-number"):
17+
tag.find("span", class_="caption-number").decompose()
1618

1719
for attr in remove_attrs:
1820
for tag in chapter.find_all(attrs={attr: True}):
@@ -27,8 +29,8 @@ def clean_chapter(chapter, rm_numbering=True):
2729
# remove hidden cells. in the web version, these cells are hidden by
2830
# default and users can toggle them on/off. but they take up too much
2931
# space if rendered into the pdf.
30-
".tag_hide-input > .cell_input",
31-
".tag_hide-output > .cell_output",
32+
".tag_hide-input > .hide",
33+
".tag_hide-output > .hide",
3234
".tag_hide-cell",
3335
".toggle-details",
3436

0 commit comments

Comments
 (0)