Skip to content

Commit e43cb68

Browse files
authored
Merge pull request #38 from oreillymedia/remove-table-attrs
Remove table attrs
2 parents 0e802fd + ec922f9 commit e43cb68

File tree

3 files changed

+33
-8
lines changed

3 files changed

+33
-8
lines changed

jupyter_book_to_htmlbook/text_processing.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
1-
import re
2-
3-
41
def clean_chapter(chapter, rm_numbering=True):
52
"""
63
"Cleans" the chapter from any script or style tags, removes table borders,
7-
removes any style attrs, and by default removes any section numbering.
4+
table valign/width attributes, removes any style attrs, and by default
5+
removes any section numbering.
86
"""
97
remove_tags = ['style', 'script']
8+
remove_attrs = ['style', 'valign', 'halign', 'width']
9+
1010
all_tags = chapter.find_all()
1111
for tag in all_tags:
1212
if tag.name in remove_tags:
1313
tag.decompose()
1414
if tag.name == 'table':
1515
del tag['border']
16-
for tag in chapter.find_all(attrs={'style': True}):
17-
del tag['style']
16+
17+
for attr in remove_attrs:
18+
for tag in chapter.find_all(attrs={attr: True}):
19+
del tag[attr]
1820

1921
# (optionally) remove numbering
2022
if rm_numbering:

tests/test_file_processing.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def test_process_chapter_single_chapter_file(self, tmp_path, capsys):
5252
# check on return
5353
assert "ch01.html" in result
5454

55-
def test_chapter_promote_headings(self, tmp_path, caplog):
55+
def test_chapter_promote_headings(self, tmp_path):
5656
"""
5757
we expect to have a single h1 and then a bunch of h2s
5858
in a single-file chapter, but we need to promote all the headings
@@ -227,9 +227,10 @@ def test_process_chapter_totally_invalid_file(self, tmp_path, caplog):
227227
<h1>Hello!</h1>
228228
</div>""")
229229
# first item is the intro file, so let's check on the first "chapter"
230+
caplog.set_level(logging.DEBUG)
230231
with pytest.raises(RuntimeError):
231232
process_chapter(tmp_path / 'malformed.html', tmp_path)
232-
assert "Failed to process" in caplog.text
233+
assert "Failed to process" in caplog.text
233234

234235
@pytest.mark.parametrize(
235236
"datatype", [

tests/test_text_processing.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,28 @@ def test_chapter_cleans():
4040
</h2>"""
4141

4242

43+
def test_chapter_cleans_table_specific():
44+
"""
45+
A few table-specific edge cases to check, including a no-border table
46+
and tables with valign/width attributes
47+
"""
48+
chapter = BeautifulSoup("""<table>
49+
<tr halign="left">
50+
<th rowspan="2" valign="top">0</th>
51+
<td width="50%">NaN</td>
52+
<td>NaN</td>
53+
<td>NaN</td>
54+
</tr>
55+
</table>""", "html.parser")
56+
result = clean_chapter(chapter)
57+
halign_tr = result.find("tr")
58+
valign_th = result.find("th")
59+
width_td = result.find("td") # it'll find the first
60+
assert not halign_tr.get("valign")
61+
assert not valign_th.get("valign")
62+
assert not width_td.get("width")
63+
64+
4365
def test_move_span_ids_to_sections():
4466
"""
4567
Atlas requires that cross reference targets sections so that

0 commit comments

Comments
 (0)