oreillymedia
diff --git a/‎.coveragerc‎
Lines changed: 1 addition & 0 deletions b/‎.coveragerc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 23 additions & 4 deletions b/‎README.md‎
Lines changed: 23 additions & 4 deletions
diff --git a/‎conftest.py‎
Lines changed: 34 additions & 0 deletions b/‎conftest.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎jupyter_book_to_htmlbook/code_processing.py‎
Lines changed: 2 additions & 1 deletion b/‎jupyter_book_to_htmlbook/code_processing.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎jupyter_book_to_htmlbook/figure_processing.py‎
Lines changed: 7 additions & 4 deletions b/‎jupyter_book_to_htmlbook/figure_processing.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎jupyter_book_to_htmlbook/file_processing.py‎
Lines changed: 20 additions & 18 deletions b/‎jupyter_book_to_htmlbook/file_processing.py‎
Lines changed: 20 additions & 18 deletions
diff --git a/‎jupyter_book_to_htmlbook/footnote_processing.py‎
Lines changed: 1 addition & 3 deletions b/‎jupyter_book_to_htmlbook/footnote_processing.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎jupyter_book_to_htmlbook/main.py‎
Lines changed: 7 additions & 1 deletion b/‎jupyter_book_to_htmlbook/main.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎jupyter_book_to_htmlbook/text_processing.py‎
Lines changed: 6 additions & 4 deletions b/‎jupyter_book_to_htmlbook/text_processing.py‎
Lines changed: 6 additions & 4 deletions
@@ -5,3 +5,4 @@ exclude_lines =
     def main
     app()
     except ImportError
+    if build.returncode != 0
@@ -27,6 +27,7 @@ Usage:`jb2htmlbook [OPTIONS] SOURCE TARGET`
 Help text: 
 
 ```
+$ jb2htmlbook --help
 Usage: jb2htmlbook [OPTIONS] SOURCE TARGET
 
   Converts a Jupyter Book project into HTMLBook.
@@ -53,29 +54,47 @@ Options:
   --atlas-json TEXT               Path to the book's atlas.json file
   --skip-jb-build                 Skip running `jupyter-book` as a part of
                                   this conversion
-  --skip-numbering      Skip the numbering of In[]/Out[] code cells
+  --skip-numbering                Skip the numbering of In[]/Out[] code cells
   --include-root                  Include the 'root' file of the jupyter-book
                                   project
+  --keep-highlighting             Preserve any code highlighting provided by
+                                  Jupyter Book
   --version
   --install-completion [bash|zsh|fish|powershell|pwsh]
                                   Install completion for the specified shell.
   --show-completion [bash|zsh|fish|powershell|pwsh]
                                   Show completion for the specified shell, to
                                   copy it or customize the installation.
   --help                          Show this message and exit.
-
 ```
 
-## Current Known Limitations
+## Current (Known) Limitations
 
 * Jupyter Book can only process one metadata-named code-generated figure per file. The workaround for this is to save any resultant figures to disk and refer to them as any other figure.
 
 ## Release Notes
 
+### 1.1.0
+
+Features:
+- Upgrade to Jupyter Book v.0.15.1
+- `--keep-highlighting` flag to preserve syntax highlighting for code blocks provided by Jupyter Book
+
+Bug fixes:
+- Table caption numbering is now removed from output (was causing duplicates in Atlas builds)
+- Additional safeguards around figure markup
+- Generated part.html files now have correctly-numbered (and unique) IDs
+
+Quality of life improvements:
+- Explicit tests for common PE tasks
+- Additional examples in the `example_book`
+- Improved type hints
+
+
 ### 1.0.9
 
 Bug fixes:
-- Remove spans inside `<code>` tags that display incorrectly on the ORM learning platform.
+- Remove spans inside `<code>` tags that display incorrectly on the ORM learning platform
 
 ### 1.0.8
 
 
@@ -0,0 +1,34 @@
+import pytest
+import shutil
+import subprocess
+from pathlib import Path
+
+
+@pytest.fixture(scope="class")
+def fresh_book_html(tmp_path_factory):
+    """
+    A fresh Jupyter Build (if possible) to do some markup checks
+    """
+    test_env = tmp_path_factory.mktemp("example_book_html")
+    shutil.copytree('tests/example_book', test_env, dirs_exist_ok=True)
+    # Run `jb build`j in the tmp_path
+    build = subprocess.run(["jb", "build", test_env],
+                           capture_output=True)
+
+    if build.returncode != 0:
+        return Path("tests/example_book/_build/html")
+    else:
+        return test_env / "_build/html"
+
+
+@pytest.fixture()
+def tmp_book_path(tmp_path):
+    """
+    Provides a copy of the example_book html jupyter book build
+    for use in tests.
+    """
+    test_env = tmp_path / 'tmp'
+    test_env.mkdir()
+    shutil.copytree('tests/example_book/_build/html',
+                    test_env, dirs_exist_ok=True)
+    return test_env
@@ -1,10 +1,11 @@
 import re
 import logging
+from typing import Union
 from bs4 import NavigableString  # type: ignore
 from .helpers import base_soup
 
 
-def process_code(chapter, skip_numbering=False):
+def process_code(chapter, skip_numbering: Union[bool, None] = False):
     """
     Turn rendered <pre> blocks into appropriately marked-up HTMLBook
     """
 
@@ -1,14 +1,17 @@
-from pathlib import Path
 from bs4 import NavigableString  # type: ignore
 
 
-def process_figures(chapter, build_dir: Path):
+def process_figures(chapter):
     """
     Takes a chapter soup and handles changing the references to figures
     to the /images directory per usual htmlbook repo
     """
     figures = chapter.find_all("figure")
     for figure in figures:
+
+        if figure.parent.name == "p":
+            figure.parent.unwrap()
+
         # clean anything extraneous, if extant
         if figure.find_all('a', class_="headerlink") != []:
             for anchor in figure.find_all('a', class_="headerlink"):
@@ -42,13 +45,13 @@ def process_figures(chapter, build_dir: Path):
     return chapter
 
 
-def process_informal_figs(chapter, build_dir=''):
+def process_informal_figs(chapter):
     """
     This should be run *AFTER* process figs, but basically just repoints the
     img tags.
     """
     for img in chapter.find_all('img'):
-        # Since, weirdly, a myst-marked image will be in a floating anchor,
+        # Since, weirdly, a myst-marked image will be in a floating anchor
         if img.parent.name == 'a':
             img.parent.name = "figure"
             img.parent['class'] = "informal"
 
@@ -1,7 +1,7 @@
 import logging
 import re
 from pathlib import Path
-from typing import Union
+from typing import Union, Optional, Tuple
 from bs4 import BeautifulSoup  # type: ignore
 from .admonition_processing import process_admonitions
 from .figure_processing import process_figures, process_informal_figs
@@ -37,10 +37,12 @@ def process_part(part_path: Path, output_dir: Path):
         part_number = info.group(1)
         # undo earlier space replacement and do a simple title case
         part_name = info.group(2).replace('-', ' ')
+        # just to make the string fit better
+        ns = "http://www.w3.org/1999/xhtml"
 
         with open(output_dir / f'part-{part_number}.html', 'wt') as f:
             f.write(f"""
-<div xmlns="http://www.w3.org/1999/xhtml" data-type="part" id="part-1">
+<div xmlns="{ns}" data-type="part" id="part-{part_number}">
 <h1>{part_name}</h1>
 </div>""".lstrip())
         return f'part-{part_number}.html'
@@ -151,7 +153,8 @@ def get_main_section(soup):
     return main, bibliography
 
 
-def process_chapter_soup(toc_element: Union[Path, list[Path]]):
+def process_chapter_soup(
+        toc_element: Union[Path, list[Path]]) -> Tuple[BeautifulSoup, str]:
     """ unified file chapter processing """
 
     if isinstance(toc_element, list):  # i.e., an ordered list of chapter parts
@@ -169,8 +172,12 @@ def process_chapter_soup(toc_element: Union[Path, list[Path]]):
     # perform initial swapping and namespace designation
     chapter, bib = get_main_section(base_soup)
 
-    if not chapter:
-        return None, None
+    if not chapter:  # guard against malformed files
+        logging.warning(f"Failed to process {toc_element}.")
+        raise RuntimeError(
+            f"Failed to process {toc_element}. Please check for error in " +
+            "your source file(s). Contact the Tools team for additional " +
+            "support.")
 
     else:
         chapter['xmlns'] = 'http://www.w3.org/1999/xhtml'  # type: ignore
@@ -184,8 +191,8 @@ def process_chapter_soup(toc_element: Union[Path, list[Path]]):
                 subsection, sub_bib = process_chapter_subparts(subfile)
                 chapter.append(subsection)
                 if bib and sub_bib:
-                    entries = sub_bib.find_all("dd")
-                    bib.dl.extend(entries)
+                    entries = sub_bib.find_all("dd")  # type: ignore
+                    bib.dl.extend(entries)  # type: ignore
                 elif sub_bib:
                     bib = sub_bib
 
@@ -222,7 +229,8 @@ def process_chapter(toc_element,
                     source_dir,
                     build_dir=Path('.'),
                     book_ids: list = [],
-                    skip_cell_numbering: bool = False):
+                    skip_cell_numbering: Optional[bool] = False,
+                    keep_highlighting: Optional[bool] = False):
     """
     Takes a list of chapter files and chapter lists and then writes the chapter
     to the root directory in which the script is run. Note that this assumes
@@ -232,26 +240,20 @@ def process_chapter(toc_element,
     chapter, ch_name = process_chapter_soup(toc_element)
     logging.info(f"Processing {ch_name}...")
 
-    if not chapter:  # guard against malformed files
-        logging.warning(f"Failed to process {toc_element}.")
-        raise RuntimeError(
-            f"Failed to process {toc_element}. Please check for error in " +
-            "your source file(s). Contact the Tools team for additional " +
-            "support.")
-
     # perform cleans and processing
     chapter = clean_chapter(chapter)
     # note: must process figs before xrefs
-    chapter = process_figures(chapter, build_dir)
-    chapter = process_informal_figs(chapter, build_dir)
+    chapter = process_figures(chapter)
+    chapter = process_informal_figs(chapter)
     chapter = process_internal_refs(chapter)
     chapter = process_citations(chapter)
     chapter = process_footnotes(chapter)
     chapter = process_admonitions(chapter)
     chapter = process_math(chapter)
     # note: best to run examples before code processing
     chapter = process_code_examples(chapter)
-    chapter = process_code(chapter, skip_cell_numbering)
+    if not keep_highlighting:
+        chapter = process_code(chapter, skip_cell_numbering)
     chapter = process_inline_code(chapter)
     chapter = move_span_ids_to_sections(chapter)
     chapter = process_sidebars(chapter)
 
@@ -12,9 +12,7 @@ def process_footnotes(chapter):
         try:
             ref_id = ref.get('href').split('#')[-1]
             # double next_sibling b/c next sibling is a space
-            ref_location = chapter.find(
-                                        "dt", {"id": ref_id}
-                                        ).next_sibling.next_sibling
+            ref_location = chapter.find(id=ref_id).next_sibling.next_sibling
             footnote_contents = ref_location.find('p').children
             ref.name = 'span'
             ref['data-type'] = 'footnote'
 
@@ -43,6 +43,11 @@ def jupter_book_to_htmlbook(
             False,
             "--include-root",
             help="Include the 'root' file of the jupyter-book project"),
+        keep_highlighting: Optional[bool] = typer.Option(
+            False,
+            "--keep-highlighting",
+            help="Preserve any code highlighting provided by Jupyter Book",
+            ),
         version: Optional[bool] = typer.Option(
             None,
             "--version",
@@ -132,7 +137,8 @@ def jupter_book_to_htmlbook(
                                                 source_dir,
                                                 output_dir,
                                                 book_ids,
-                                                skip_cell_numbering)
+                                                skip_cell_numbering,
+                                                keep_highlighting)
             processed_files.append(f'{target}/{file}')
             book_ids.extend(chapter_ids)
 
 
@@ -1,8 +1,8 @@
 def clean_chapter(chapter, rm_numbering=True):
     """
     "Cleans" the chapter from any script or style tags, removes table borders,
-    table valign/width attributes, removes any style attrs, and by default
-    removes any section numbering.
+    table valign/width attributes, caption numbering, removes any style attrs,
+    and by default removes any section numbering.
     """
     remove_tags = ['style', 'script']
     remove_attrs = ['style', 'valign', 'halign', 'width']
@@ -13,6 +13,8 @@ def clean_chapter(chapter, rm_numbering=True):
             tag.decompose()
         if tag.name == 'table':
             del tag['border']
+            if tag.find("span", class_="caption-number"):
+                tag.find("span", class_="caption-number").decompose()
 
     for attr in remove_attrs:
         for tag in chapter.find_all(attrs={attr: True}):
@@ -27,8 +29,8 @@ def clean_chapter(chapter, rm_numbering=True):
         # remove hidden cells. in the web version, these cells are hidden by
         # default and users can toggle them on/off. but they take up too much
         # space if rendered into the pdf.
-        ".tag_hide-input > .cell_input",
-        ".tag_hide-output > .cell_output",
+        ".tag_hide-input > .hide",
+        ".tag_hide-output > .hide",
         ".tag_hide-cell",
         ".toggle-details",