Skip to content

Commit e702d8d

Browse files
Adds handling of "hidden" tag for use with DITA output.
Signed-off-by: Kevin Putnam <[email protected]>
1 parent 3be41eb commit e702d8d

File tree

2 files changed

+56
-32
lines changed

2 files changed

+56
-32
lines changed

sphinx_md/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def setup(app):
154154
app.add_config_value('sphinx_md_useGitHubURL',False,'')
155155
app.add_config_value('sphinx_md_githubFileURL','','')
156156
app.add_config_value('sphinx_md_githubDirURL','','')
157+
app.add_config_value('sphinx_md_processRaw',False,'')
157158
app.add_config_value('sphinx_md_tableIDs',{},'')
158159
app.connect('doctree-read',html2Docutils)
159160
app.connect('doctree-resolved',fixLocalMDAnchors)

sphinx_md/convert_html.py

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,40 @@
33
import os.path
44

55
known_start_tags = ['p','img','table']
6+
hidden_tag = 'hidden'
67

78
def html2Docutils(app, doctree):
89
#find all raw nodes
10+
if not app.env.config.sphinx_md_processRaw:
11+
return
912
filepath = doctree['source']
13+
htmlCounter = 0
1014
for node in doctree.traverse(nodes.raw):
1115
soup = BeautifulSoup(node.astext(),features="html.parser")
12-
if soup.find().name in known_start_tags:
13-
#send it to converter
14-
div = nodes.container()
15-
convertHTML(soup, div, app, filepath)
16-
parent = node.parent
17-
parent.replace(node,div)
18-
#replace raw node with output of converter
19-
#child = nodes.Text("poof")
20-
#node[0]=child
16+
if soup.find():
17+
if soup.find().name in known_start_tags:
18+
#send it to converter
19+
div = nodes.container()
20+
div['id']='html-content-' + str(htmlCounter)
21+
htmlCounter += 1
22+
convertHTML(soup, div, app, filepath)
23+
parent = node.parent
24+
parent.replace(node,div.children)
25+
#replace raw node with output of converter
26+
#child = nodes.Text("poof")
27+
#node[0]=child
28+
elif soup.find().name == hidden_tag:
29+
hidden_comment = nodes.comment()
30+
comment_text = nodes.Text("hidden")
31+
hidden_comment.append(comment_text)
32+
parent = node.parent
33+
parent.replace(node,hidden_comment)
2134

2235
def convertHTML(soup, parent, app, filepath):
2336
if hasattr(soup,"children"):
2437
for child in soup.children:
2538
node = None
26-
if child.name is not None:
39+
if hasattr(child,"name") and child.name is not None:
2740
if child.name == "table":
2841
if filepath not in app.env.config.sphinx_md_tableIDs:
2942
app.env.config.sphinx_md_tableIDs['filepath']=0
@@ -51,19 +64,24 @@ def convertHTML(soup, parent, app, filepath):
5164
if "alt" in child.attrs:
5265
node["alt"]=child.attrs['alt']
5366
if "src" in child.attrs:
54-
basepath = app.env.srcdir + "/"
55-
docfilename = os.path.splitext(os.path.relpath(filepath,basepath))[0]
56-
relpath = os.path.dirname(os.path.relpath(filepath,basepath))
57-
imgPath = os.path.join(relpath,child.attrs['src'])
58-
node["uri"]= imgPath
59-
print("Checking for file.")
60-
if os.path.isfile(imgPath):
61-
if imgPath not in app.env.images:
62-
imageFileName = os.path.basename(imgPath)
63-
imageTuple = ({docfilename},imageFileName)
64-
app.env.images[imgPath]=imageTuple
67+
if "https" in child.attrs['src']:
68+
node["uri"]=child.attrs['src']
69+
else:
70+
basepath = app.env.srcdir + "/"
71+
docfilename = os.path.splitext(os.path.relpath(filepath,basepath))[0]
72+
relpath = os.path.dirname(os.path.relpath(filepath,basepath))
73+
imgPath = os.path.join(relpath,child.attrs['src'])
74+
node["uri"]= imgPath
75+
if os.path.isfile(imgPath):
76+
if imgPath not in app.env.images:
77+
imageFileName = os.path.basename(imgPath)
78+
imageTuple = ({docfilename},imageFileName)
79+
app.env.images[imgPath]=imageTuple
6580
if "width" in child.attrs:
66-
node["width"]=child.attrs['width']
81+
suffix = ''
82+
if child.attrs['width'].isnumeric():
83+
suffix = 'px'
84+
node["width"]=child.attrs['width'] + suffix
6785
if "height" in child.attrs:
6886
node["height"]=child.attrs['height']
6987
node["candidates"]="{'*': '" + imgPath + "'}"
@@ -78,22 +96,27 @@ def convertHTML(soup, parent, app, filepath):
7896
node = nodes.row()
7997
parent += node
8098
elif child.name == "th" or child.name == "td":
81-
node = nodes.entry()
99+
eNode = nodes.entry()
100+
node = nodes.paragraph()
101+
eNode += node
102+
parent += eNode
103+
elif child.name == "sup":
104+
node = nodes.superscript()
105+
parent += node
106+
elif child.name == "a":
107+
node = nodes.reference()
108+
node["refuri"] = child.attrs['href']
109+
parent += node
110+
elif child.name == "code":
111+
node = nodes.literal()
82112
parent += node
83113
else:
84-
if isinstance(parent, nodes.entry) or isinstance(parent, nodes.paragraph) or isinstance(parent, nodes.image):
114+
if isinstance(parent,nodes.Node):
115+
#if isinstance(parent, nodes.entry) or isinstance(parent, nodes.paragraph) or isinstance(parent, nodes.image) or isinstance(parent, nodes.superscript) or isinstance(parent, nodes.reference) or isinstance(parent, nodes.literal):
85116
node = nodes.Text(child)
86117
parent += node
87118
if node:
88119
convertHTML(child,node,app,filepath)
89-
# soup = removeHTMLAttributes(soup,"table")
90-
# soup = removeHTMLAttributes(soup,"p")
91-
# soup = replaceTag(soup,"tr","row")
92-
# soup = replaceTag(soup,"th","entry")
93-
# soup = replaceTag(soup,"td","entry")
94-
# soup = addTGroup(soup)
95-
# soup = fixImages(soup)
96-
# ditaXML = str(soup)
97120

98121
def removeHTMLAttributes(soup,tagName):
99122
tags = soup.find_all(tagName)

0 commit comments

Comments
 (0)