1
1
import sys
2
2
import os
3
3
4
+
4
5
def processFile (inFile , outFile ):
5
- mdFile = open (inFile , 'r' )
6
+ mdFile = open (inFile , "r" )
6
7
toc = []
7
- levels = [0 ,0 , 0 , 0 , 0 ]
8
- newFile = open (outFile , 'w' )
8
+ levels = [0 , 0 , 0 , 0 , 0 ]
9
+ newFile = open (outFile , "w" )
9
10
tempFile = []
10
11
tocLoc = 0
11
12
partOfToc = False
12
-
13
+
13
14
for line in mdFile :
14
- if partOfToc and line != ' \n ' :
15
+ if partOfToc and line != " \n " :
15
16
continue
16
17
else :
17
18
partOfToc = False
18
- if ' Table of Contents' in line :
19
+ if " Table of Contents" in line :
19
20
tocLoc = len (tempFile ) + 1
20
21
partOfToc = True
21
22
line += "\n "
22
- elif line [0 ] == '#' :
23
+ elif line [0 ] == "#" :
23
24
secId = buildToc (line , toc , levels )
24
- line = addSectionTag (cleanLine (line ), secId ) + ' \n '
25
+ line = addSectionTag (cleanLine (line ), secId ) + " \n "
25
26
tempFile .append (line )
26
27
27
-
28
28
for line in toc :
29
29
tempFile .insert (tocLoc , line )
30
30
tocLoc += 1
@@ -36,58 +36,75 @@ def processFile(inFile, outFile):
36
36
mdFile .close ()
37
37
newFile .close ()
38
38
39
+
39
40
def addSectionTag (line , secId ):
40
- startIndex = line .find (' ' )
41
- line = line [:startIndex + 1 ] + ' <a id=\' ' + secId + ' \' />' + line [startIndex + 1 :]
41
+ startIndex = line .find (" " )
42
+ line = line [: startIndex + 1 ] + " <a id='" + secId + "' />" + line [startIndex + 1 :]
42
43
return line
43
44
45
+
44
46
def buildToc (line , toc , levels ):
45
47
line = cleanLine (line )
46
- secId = 's'
47
- if line [:4 ] == '####' :
48
-
49
- #raise UserWarning('Header levels greater than 3 not supported')
48
+ secId = "s"
49
+ if line [:4 ] == "####" :
50
+ # raise UserWarning('Header levels greater than 3 not supported')
50
51
levels [4 ] += 1
51
- secId += str (levels [1 ]) + '-' + str (levels [2 ]) + '-' + str (levels [3 ]) + '-' + str (levels [4 ])
52
- toc .append (' - [' + line [5 :] + '](#' + secId + ')\n ' )
53
- elif line [:3 ] == '###' :
52
+ secId += (
53
+ str (levels [1 ])
54
+ + "-"
55
+ + str (levels [2 ])
56
+ + "-"
57
+ + str (levels [3 ])
58
+ + "-"
59
+ + str (levels [4 ])
60
+ )
61
+ toc .append (" - [" + line [5 :] + "](#" + secId + ")\n " )
62
+ elif line [:3 ] == "###" :
54
63
levels [3 ] += 1
55
- secId += str (levels [1 ]) + '-' + str (levels [2 ]) + '-' + str (levels [3 ])
56
- toc .append (' - [' + line [4 :] + ' ](#' + secId + ' )\n ' )
57
- elif line [:2 ] == '##' :
64
+ secId += str (levels [1 ]) + "-" + str (levels [2 ]) + "-" + str (levels [3 ])
65
+ toc .append (" - [" + line [4 :] + " ](#" + secId + " )\n " )
66
+ elif line [:2 ] == "##" :
58
67
levels [2 ] += 1
59
68
levels [3 ] = 0
60
- secId += str (levels [1 ]) + '-' + str (levels [2 ])
61
- toc .append (' - [' + line [3 :] + ' ](#' + secId + ' )\n ' )
62
- elif line [:1 ] == '#' :
69
+ secId += str (levels [1 ]) + "-" + str (levels [2 ])
70
+ toc .append (" - [" + line [3 :] + " ](#" + secId + " )\n " )
71
+ elif line [:1 ] == "#" :
63
72
levels [1 ] += 1
64
73
levels [3 ] = levels [2 ] = 0
65
74
secId += str (levels [1 ])
66
- toc .append (' - [' + line [2 :] + ' ](#' + secId + ' )\n ' )
75
+ toc .append (" - [" + line [2 :] + " ](#" + secId + " )\n " )
67
76
return secId
68
77
78
+
69
79
def cleanLine (text ):
70
80
text = stripNewline (text )
71
81
text = removeAnchors (text )
72
82
return text
73
83
84
+
74
85
def stripNewline (text ):
75
- return text .replace ('\n ' , '' )
86
+ return text .replace ("\n " , "" )
87
+
76
88
77
89
def removeAnchors (text ):
78
- while ( '<' in text and '>' in text ) :
79
- leftTag = text .index ('<' )
80
- rightTag = text .index ('>' )
81
- text = text [0 :leftTag ] + text [rightTag + 1 :]
90
+ while "<" in text and ">" in text :
91
+ leftTag = text .index ("<" )
92
+ rightTag = text .index (">" )
93
+ text = text [0 :leftTag ] + text [rightTag + 1 :]
82
94
return text
83
95
84
- def clean (docx_file ,inFile ,outFile ,run_pandoc = True ):
96
+
97
+ def clean (docx_file , inFile , outFile , run_pandoc = True ):
85
98
if run_pandoc :
86
- os .system ("pandoc -t gfm --wrap=none --extract-media . -o file.md {0} --mathjax" .format (docx_file ))
87
- num_str = [str (i ) for i in range (1 ,11 )]
88
- lines = open (inFile ,'r' ).readlines ()
99
+ os .system (
100
+ "pandoc -t gfm --wrap=none --extract-media . -o file.md {0} --mathjax" .format (
101
+ docx_file
102
+ )
103
+ )
104
+ num_str = [str (i ) for i in range (1 , 11 )]
105
+ lines = open (inFile , "r" ).readlines ()
89
106
90
- #notoc_lines = []
107
+ # notoc_lines = []
91
108
i = 0
92
109
# while i < len(lines):
93
110
# line = lines[i]
@@ -104,31 +121,40 @@ def clean(docx_file,inFile,outFile,run_pandoc=True):
104
121
# notoc_lines = []
105
122
i = 0
106
123
while i < len (lines ):
107
- #if lines[i].strip().startswith("----"):
124
+ # if lines[i].strip().startswith("----"):
108
125
# lines[i-1] = "## " + lines[i-1]
109
126
# lines[i] = "\n"
110
127
if "<img" in lines [i ]:
111
- lines [i ] = lines [i ].replace ("#" ,"" )
128
+ lines [i ] = lines [i ].replace ("#" , "" )
112
129
if "<p>" in lines [i ]:
113
- lines [i ] = lines [i ].replace ("<p>" ,"" ).replace ("</p>" ,"<br>" ).replace ("#" ,"~" )
130
+ lines [i ] = (
131
+ lines [i ].replace ("<p>" , "" ).replace ("</p>" , "<br>" ).replace ("#" , "~" )
132
+ )
114
133
if "blockquote" in lines [i ]:
115
- lines [i ] = lines [i ].replace ("<blockquote>" ,"" ).replace ("</blockquote>" ,"" )
134
+ lines [i ] = lines [i ].replace ("<blockquote>" , "" ).replace ("</blockquote>" , "" )
116
135
if lines [i ].strip ().startswith ("$" ) and not "bmatrix" in lines [i ].lower ():
117
136
label = lines [i ].split ()[- 1 ]
118
- eq_str = lines [i ].replace ("$$" ,"$" ).split ('$' )[1 ]
119
- eq_str = r"{0}" .format (eq_str ).replace ("\\ \\ " ,"\\ " ).replace (" \\ " ," " ).replace ("\\ _" ,"_" )
120
- math_str_pre = "<img src=\" https://latex.codecogs.com/svg.latex?\Large&space;{0}" .format (eq_str )
121
- math_str_post = "\" title=\" \Large {0}\" /> {1} <br>" .format (eq_str ,label )
137
+ eq_str = lines [i ].replace ("$$" , "$" ).split ("$" )[1 ]
138
+ eq_str = (
139
+ r"{0}" .format (eq_str )
140
+ .replace ("\\ \\ " , "\\ " )
141
+ .replace (" \\ " , " " )
142
+ .replace ("\\ _" , "_" )
143
+ )
144
+ math_str_pre = '<img src="https://latex.codecogs.com/svg.latex?\Large&space;{0}' .format (
145
+ eq_str
146
+ )
147
+ math_str_post = '" title="\Large {0}" /> {1} <br>' .format (eq_str , label )
122
148
lines [i ] = math_str_pre + " " + math_str_post
123
- if lines [i ].strip ().startswith ("<table>" ): # and "pcf" in lines[i].lower():
124
- lines [i ] = " <div style=\ " text-align: left\" >" + lines [i ] + "</div>"
149
+ if lines [i ].strip ().startswith ("<table>" ): # and "pcf" in lines[i].lower():
150
+ lines [i ] = ' <div style="text-align: left">' + lines [i ] + "</div>"
125
151
if "comment" in lines [i ].lower ():
126
- lines [i ] = lines [i ].replace ("~" ,"#" )
152
+ lines [i ] = lines [i ].replace ("~" , "#" )
127
153
elif lines [i ].strip ().startswith ("[" ) and "]" in lines [i ]:
128
- raw = lines [i ].strip ().split (']' )
129
- rraw = " " .join (raw [0 ].split ()[:- 1 ])+ "]"
154
+ raw = lines [i ].strip ().split ("]" )
155
+ rraw = " " .join (raw [0 ].split ()[:- 1 ]) + "]"
130
156
new_line = rraw + "" .join (raw [1 :]) + "\n "
131
- print (lines [i ],new_line )
157
+ print (lines [i ], new_line )
132
158
lines [i ] = new_line
133
159
# elif "bmatrix" in lines[i].lower():
134
160
# eq_str = lines[i]
@@ -146,20 +172,18 @@ def clean(docx_file,inFile,outFile,run_pandoc=True):
146
172
147
173
i += 1
148
174
149
- #lines[0] = "# Table of Contents\n"
150
- with open (outFile ,'w' ) as f :
151
-
175
+ # lines[0] = "# Table of Contents\n"
176
+ with open (outFile , "w" ) as f :
152
177
for line in lines :
153
- #if line.lower().strip().startswith("[introduction"):
178
+ # if line.lower().strip().startswith("[introduction"):
154
179
# f.write("\n# Table of Contents\n\n")
155
180
f .write (line )
156
181
157
182
158
183
if __name__ == "__main__" :
159
-
160
- clean ("new_function_documentation.docx" ,"file.md" ,"fortran_library_documentation.md" ,True )
161
-
162
-
163
-
164
-
165
-
184
+ clean (
185
+ "new_function_documentation.docx" ,
186
+ "file.md" ,
187
+ "fortran_library_documentation.md" ,
188
+ True ,
189
+ )
0 commit comments