-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathprepare.py
More file actions
122 lines (100 loc) · 4.58 KB
/
prepare.py
File metadata and controls
122 lines (100 loc) · 4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Prepares a simple Markdown doc for publishing on the CN Blog site."""
import sys
import argparse
from pathlib import Path
import re
import markdown
from markdown.preprocessors import Preprocessor
import md_toc
# Create the parser
my_parser = argparse.ArgumentParser(description='Prepare a pure Markdown article for blog publishing.')
# Add the arguments
my_parser.add_argument('File',
metavar='file',
type=str,
help='the path to the file to be prepared')
my_parser.add_argument('-toc',
action='store_true',
help='whether to create a clickable TOC at the top')
my_parser.add_argument('-p',
action='store_true',
help='if the Markdown is copied from a CN Learning Platform Resource')
# Execute the parse_args() method
args = my_parser.parse_args()
input_path = Path(args.File)
add_toc = args.toc
is_platform = args.p
if not input_path.is_file():
print('The file specified does not exist')
sys.exit()
new_tab_link_pattern = re.compile(r'(?<!!)\[([\w\s]+)\]\((https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b[-a-zA-Z0-9()@:%_\+.~#?&\/\/=]*)\)')
"""
the regex matches all Markdown links of the form following form:
[Example](http://www.example.com)
and replaces them with new-tab HTML links, e.g.:
<a href="http://www.example.com" target="_blank">Example</a>
NOTE: it does **NOT** match Markdown image links, e.g.

which is intentional and expected behavior
"""
def update_links(matchobj):
"""Creates a HTML link that opens in a new tab from an appropriate re.match() object."""
text = matchobj.group(1)
url = matchobj.group(2)
return f'<a href="{url}" target="_blank">{text}</a>'
if add_toc:
class RenderOnlyHeadings(Preprocessor):
"""Skip any line with that is NOT a heading."""
def run(self, lines):
new_lines = []
for line in lines:
m = re.search(r"^#+\s*\w+", line)
if m: # only lines that are headings are passed through
new_lines.append(line)
return new_lines
# initialize the Markdown class
md = markdown.Markdown(extensions=['toc'])
md.preprocessors.register(RenderOnlyHeadings(md.parser), 'skip', 710)
with open(input_path, 'r+') as fin:
content = fin.read()
# Replaces all Markdown-style links with HTML new-tab links
content = re.sub(new_tab_link_pattern, update_links, content)
print(f'{input_path.name}: Replaced Links')
if is_platform:
# take out bootstrap-specific formatting of information boxes
# and replace with making them blockquotes, which render nicely on our blog
content = re.sub(r"<div\sclass='alert+\s+\w+-\w+'\srole='alert'>\n\s+(<strong>\w+:<\/strong>\s)?", ">", content)
content = re.sub(r"<\/div>\n", "", content)
# upscale the levels of the headings by one (title will remain only 1st-level heading)
content = re.sub(r"###\s+", "## ", content)
content = re.sub(r"####\s+", "### ", content)
print(f'{input_path.name}: Substituted Learning-Platform specific syntax')
if add_toc: # only add the TOC if specified for the file
# first, get the HTML-converted links with page-stub ids
converted_headings = md.convert(content)
section_links = converted_headings.split('\n')
# breakpoint()
if converted_headings:
# now build the clickable Markdown TOC
# the below settings keep two levels of headings for the TOC: h2 and h3
toc = md_toc.build_toc(input_path, keep_header_levels=3)
# write to the beginning of the file
content = f"{toc}\n{content}"
if toc:
print(f"{input_path.name}: Added TOC")
# the listcomp below takes the HTML headings, e.g.:
# <h3 id="installing-git-on-linux">Installing Git on Linux</h3>
# and fetches only the content of each tags, then saves it in a list
headings = [h.split('>')[-2].split('<')[0] for h in section_links]
for i, h in enumerate(headings):
p = r'#{2,3}[ \w]' + h
content = re.sub(p, section_links[i], content)
else:
print(f'{input_path.name}: TOC is now clickable')
# reset the file and write the changed content
fin.seek(0)
fin.truncate(0)
fin.write(content)
if __name__ == '__main__':
args = my_parser.parse_args()
print(f"----- finished processing file ----")