Skip to content

Commit a03fa73

Browse files
Massive changes to table handling to allow for targets other than HTML.
Signed-off-by: Kevin Putnam <[email protected]>
1 parent 20932ff commit a03fa73

File tree

2 files changed

+152
-0
lines changed

2 files changed

+152
-0
lines changed

sphinx_md/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import sphinx
6+
from .convert_html import html2Docutils
67

78
from docutils import nodes
89
from os.path import isdir, isfile, join, basename, dirname
@@ -153,6 +154,8 @@ def setup(app):
153154
app.add_config_value('sphinx_md_useGitHubURL',False,'')
154155
app.add_config_value('sphinx_md_githubFileURL','','')
155156
app.add_config_value('sphinx_md_githubDirURL','','')
157+
app.add_config_value('sphinx_md_tableIDs',{},'')
158+
app.connect('doctree-read',html2Docutils)
156159
app.connect('doctree-resolved',fixLocalMDAnchors)
157160
app.connect('missing-reference',fixRSTLinkInMD)
158161

sphinx_md/convert_html.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
from bs4 import BeautifulSoup, Tag
2+
from docutils import nodes
3+
import os.path
4+
5+
known_start_tags = ['p','img','table']
6+
7+
def html2Docutils(app, doctree):
8+
#find all raw nodes
9+
filepath = doctree['source']
10+
for node in doctree.traverse(nodes.raw):
11+
soup = BeautifulSoup(node.astext(),features="html.parser")
12+
if soup.find().name in known_start_tags:
13+
#send it to converter
14+
div = nodes.container()
15+
convertHTML(soup, div, app, filepath)
16+
parent = node.parent
17+
parent.replace(node,div)
18+
#replace raw node with output of converter
19+
#child = nodes.Text("poof")
20+
#node[0]=child
21+
22+
def convertHTML(soup, parent, app, filepath):
23+
if hasattr(soup,"children"):
24+
for child in soup.children:
25+
node = None
26+
if child.name is not None:
27+
if child.name == "table":
28+
if filepath not in app.env.config.sphinx_md_tableIDs:
29+
app.env.config.sphinx_md_tableIDs['filepath']=0
30+
else:
31+
app.evn.config.sphinx_md_tableIDs['filepath'] += 1
32+
tNode = nodes.table()
33+
tNode['ids'].append("id"+str(app.env.config.sphinx_md_tableIDs['filepath']))
34+
titleNode = nodes.title()
35+
node = nodes.tgroup()
36+
ncols = getNumCols(child)
37+
node['cols']= ncols
38+
for x in range(ncols):
39+
colspecNode = nodes.colspec()
40+
colspecNode["colwidth"]=1
41+
node += colspecNode
42+
tNode += titleNode
43+
tNode += node
44+
parent += tNode
45+
elif child.name == "p":
46+
node = nodes.paragraph()
47+
parent += node
48+
elif child.name == "img":
49+
node = nodes.image()
50+
imgPath = ""
51+
if "alt" in child.attrs:
52+
node["alt"]=child.attrs['alt']
53+
if "src" in child.attrs:
54+
basepath = app.env.srcdir + "/"
55+
docfilename = os.path.splitext(os.path.relpath(filepath,basepath))[0]
56+
relpath = os.path.dirname(os.path.relpath(filepath,basepath))
57+
imgPath = os.path.join(relpath,child.attrs['src'])
58+
node["uri"]= imgPath
59+
print("Checking for file.")
60+
if os.path.isfile(imgPath):
61+
if imgPath not in app.env.images:
62+
imageFileName = os.path.basename(imgPath)
63+
imageTuple = ({docfilename},imageFileName)
64+
app.env.images[imgPath]=imageTuple
65+
if "width" in child.attrs:
66+
node["width"]=child.attrs['width']
67+
if "height" in child.attrs:
68+
node["height"]=child.attrs['height']
69+
node["candidates"]="{'*': '" + imgPath + "'}"
70+
parent += node
71+
elif child.name == "thead":
72+
node = nodes.thead()
73+
parent += node
74+
elif child.name == "tbody":
75+
node = nodes.tbody()
76+
parent += node
77+
elif child.name == "tr":
78+
node = nodes.row()
79+
parent += node
80+
elif child.name == "th" or child.name == "td":
81+
node = nodes.entry()
82+
parent += node
83+
else:
84+
if isinstance(parent, nodes.entry) or isinstance(parent, nodes.paragraph) or isinstance(parent, nodes.image):
85+
node = nodes.Text(child)
86+
parent += node
87+
if node:
88+
convertHTML(child,node,app,filepath)
89+
# soup = removeHTMLAttributes(soup,"table")
90+
# soup = removeHTMLAttributes(soup,"p")
91+
# soup = replaceTag(soup,"tr","row")
92+
# soup = replaceTag(soup,"th","entry")
93+
# soup = replaceTag(soup,"td","entry")
94+
# soup = addTGroup(soup)
95+
# soup = fixImages(soup)
96+
# ditaXML = str(soup)
97+
98+
def removeHTMLAttributes(soup,tagName):
99+
tags = soup.find_all(tagName)
100+
for tag in tags:
101+
attList = []
102+
for attr in tag.attrs:
103+
attList.append(attr)
104+
for att in attList:
105+
del tag[att]
106+
return soup
107+
108+
def replaceTag(soup,oldTag,newTag,delAttrs=True):
109+
tags = soup.find_all(oldTag)
110+
for tag in tags:
111+
tag.name = newTag
112+
if delAttrs:
113+
attList = []
114+
for attr in tag.attrs:
115+
attList.append(attr)
116+
for att in attList:
117+
del tag[att]
118+
return soup
119+
120+
def fixImages(soup):
121+
imgTags = soup.find_all('img')
122+
for imgTag in imgTags:
123+
altTag = soup.new_tag("alt")
124+
imgTag.name = "image"
125+
imgTag['href']=imgTag['src']
126+
del imgTag['src']
127+
altTag.string = imgTag['alt']
128+
del imgTag['alt']
129+
imgTag.append(altTag)
130+
return soup
131+
132+
133+
def addTGroup(soup):
134+
numCols = getNumCols(soup)
135+
tags = soup.find_all('table')
136+
for tableTag in tags:
137+
tableTag.name = 'tgroup'
138+
tableTag['cols']=numCols
139+
del tableTag['class']
140+
wrap(tableTag,soup.new_tag("table"))
141+
return soup
142+
143+
def wrap(to_wrap, wrap_in):
144+
contents = to_wrap.replace_with(wrap_in)
145+
wrap_in.append(contents)
146+
147+
def getNumCols(soup):
148+
rows = soup.find_all('th')
149+
return len(rows)

0 commit comments

Comments
 (0)