1
1
#!/usr/bin/env python
2
2
3
+ import json
4
+ import subprocess
3
5
from collections import namedtuple
4
6
from pathlib import Path
5
- import json
7
+
6
8
from lxml import html
7
- import subprocess
8
9
9
- DEST = Path ('/home/harry/ workspace/www.obeythetestinggoat.com/content/book' )
10
+ DEST = Path ("~/ workspace/www.obeythetestinggoat.com/content/book" ). expanduser ( )
10
11
11
12
CHAPTERS = [
12
- c .replace (' .asciidoc' , ' .html' )
13
- for c in json .loads (open ( ' atlas.json' ). read ())[' files' ]
13
+ c .replace (" .asciidoc" , " .html" )
14
+ for c in json .loads (Path ( " atlas.json" ). read_text ())[" files" ]
14
15
]
15
- for tweak_chap in [' praise.html' , ' part1.html' , ' part2.html' , ' part3.html' ]:
16
- CHAPTERS [CHAPTERS .index (tweak_chap )] = tweak_chap .replace ('.' , ' .forbook.' )
17
- CHAPTERS .remove (' cover.html' )
18
- CHAPTERS .remove (' titlepage.html' )
19
- CHAPTERS .remove (' copyright.html' )
20
- CHAPTERS .remove (' toc.html' )
21
- CHAPTERS .remove (' ix.html' )
22
- CHAPTERS .remove (' author_bio.html' )
23
- CHAPTERS .remove (' colo.html' )
16
+ for tweak_chap in [" praise.html" , " part1.html" , " part2.html" , " part3.html" ]:
17
+ CHAPTERS [CHAPTERS .index (tweak_chap )] = tweak_chap .replace ("." , " .forbook." )
18
+ CHAPTERS .remove (" cover.html" )
19
+ CHAPTERS .remove (" titlepage.html" )
20
+ CHAPTERS .remove (" copyright.html" )
21
+ CHAPTERS .remove (" toc.html" )
22
+ CHAPTERS .remove (" ix.html" )
23
+ CHAPTERS .remove (" author_bio.html" )
24
+ CHAPTERS .remove (" colo.html" )
24
25
25
- ChapterInfo = namedtuple (' ChapterInfo' , ' href_id chapter_title subheaders xrefs' )
26
+ ChapterInfo = namedtuple (" ChapterInfo" , " href_id chapter_title subheaders xrefs" )
26
27
27
28
28
29
def make_chapters ():
29
30
for chapter in CHAPTERS :
30
- subprocess .check_call ([' make' , chapter ], stdout = subprocess .PIPE )
31
+ subprocess .check_call ([" make" , chapter ], stdout = subprocess .PIPE )
31
32
32
33
33
34
def parse_chapters ():
34
35
for chapter in CHAPTERS :
35
- raw_html = open (chapter ).read ()
36
+ raw_html = Path (chapter ).read_text ()
36
37
yield chapter , html .fromstring (raw_html )
37
38
38
39
39
40
def get_anchor_targets (parsed_html ):
40
- ignores = {'header' , 'content' , 'footnotes' , 'footer' , 'footer-text' }
41
- all_ids = [
42
- a .get ('id' ) for a in parsed_html .cssselect ('*[id]' )
43
- ]
44
- return [i for i in all_ids if not i .startswith ('_' ) and i not in ignores ]
41
+ ignores = {"header" , "content" , "footnotes" , "footer" , "footer-text" }
42
+ all_ids = [a .get ("id" ) for a in parsed_html .cssselect ("*[id]" )]
43
+ return [i for i in all_ids if not i .startswith ("_" ) and i not in ignores ]
44
+
45
45
46
46
def get_chapter_info ():
47
47
chapter_info = {}
48
- appendix_numbers = list (' ABCDEFGHIJKL' )
48
+ appendix_numbers = list (" ABCDEFGHIJKL" )
49
49
chapter_numbers = list (range (1 , 100 ))
50
50
part_numbers = list (range (1 , 10 ))
51
51
52
52
for chapter , parsed_html in parse_chapters ():
53
- print (' getting info from' , chapter )
53
+ print (" getting info from" , chapter )
54
54
55
- if not parsed_html .cssselect ('h2' ):
56
- header = parsed_html .cssselect ('h1' )[0 ]
55
+ if not parsed_html .cssselect ("h2" ):
56
+ header = parsed_html .cssselect ("h1" )[0 ]
57
57
else :
58
- header = parsed_html .cssselect ('h2' )[0 ]
59
- href_id = header .get ('id' )
58
+ header = parsed_html .cssselect ("h2" )[0 ]
59
+ href_id = header .get ("id" )
60
60
if href_id is None :
61
- href_id = parsed_html .cssselect (' body' )[0 ].get ('id' )
62
- subheaders = [h .get ('id' ) for h in parsed_html .cssselect ('h3' )]
61
+ href_id = parsed_html .cssselect (" body" )[0 ].get ("id" )
62
+ subheaders = [h .get ("id" ) for h in parsed_html .cssselect ("h3" )]
63
63
64
64
chapter_title = header .text_content ()
65
- chapter_title = chapter_title .replace (' Appendix A: ' , '' )
65
+ chapter_title = chapter_title .replace (" Appendix A: " , "" )
66
66
67
- if chapter .startswith (' chapter_' ):
67
+ if chapter .startswith (" chapter_" ):
68
68
chapter_no = chapter_numbers .pop (0 )
69
- chapter_title = f' Chapter { chapter_no } : { chapter_title } '
69
+ chapter_title = f" Chapter { chapter_no } : { chapter_title } "
70
70
71
- if chapter .startswith (' appendix_' ):
71
+ if chapter .startswith (" appendix_" ):
72
72
appendix_no = appendix_numbers .pop (0 )
73
- chapter_title = f' Appendix { appendix_no } : { chapter_title } '
73
+ chapter_title = f" Appendix { appendix_no } : { chapter_title } "
74
74
75
- if chapter .startswith (' part' ):
75
+ if chapter .startswith (" part" ):
76
76
part_no = part_numbers .pop (0 )
77
- chapter_title = f'Part { part_no } : { chapter_title } '
78
-
79
- if chapter .startswith ('epilogue' ):
80
- chapter_title = f'Epilogue: { chapter_title } '
77
+ chapter_title = f"Part { part_no } : { chapter_title } "
81
78
79
+ if chapter .startswith ("epilogue" ):
80
+ chapter_title = f"Epilogue: { chapter_title } "
82
81
83
82
xrefs = get_anchor_targets (parsed_html )
84
83
chapter_info [chapter ] = ChapterInfo (href_id , chapter_title , subheaders , xrefs )
@@ -88,74 +87,78 @@ def get_chapter_info():
88
87
89
88
def fix_xrefs (contents , chapter , chapter_info ):
90
89
parsed = html .fromstring (contents )
91
- links = parsed .cssselect (' a[href^=\#]' )
90
+ links = parsed .cssselect (r" a[href^=\#]" )
92
91
for link in links :
93
92
for other_chap in CHAPTERS :
94
93
if other_chap == chapter :
95
94
continue
96
95
chapter_id = chapter_info [other_chap ].href_id
97
- href = link .get (' href' )
98
- targets = ['#' + x for x in chapter_info [other_chap ].xrefs ]
99
- if href == '#' + chapter_id :
100
- link .set (' href' , f' /book/{ other_chap } ' )
96
+ href = link .get (" href" )
97
+ targets = ["#" + x for x in chapter_info [other_chap ].xrefs ]
98
+ if href == "#" + chapter_id :
99
+ link .set (" href" , f" /book/{ other_chap } " )
101
100
elif href in targets :
102
- link .set (' href' , f' /book/{ other_chap } { href } ' )
101
+ link .set (" href" , f" /book/{ other_chap } { href } " )
103
102
104
103
return html .tostring (parsed )
105
104
106
105
107
106
def fix_title (contents , chapter , chapter_info ):
108
107
parsed = html .fromstring (contents )
109
- titles = parsed .cssselect ('h2' )
110
- if titles and titles [0 ].text .startswith (' Appendix A' ):
108
+ titles = parsed .cssselect ("h2" )
109
+ if titles and titles [0 ].text .startswith (" Appendix A" ):
111
110
title = titles [0 ]
112
111
title .text = chapter_info [chapter ].chapter_title
113
112
return html .tostring (parsed )
114
113
114
+
115
115
def copy_chapters_across_with_fixes (chapter_info , fixed_toc ):
116
- comments_html = open ( ' disqus_comments.html' ). read ()
117
- buy_book_div = html .fromstring (open ( ' buy_the_book_banner.html' ). read ())
118
- analytics_div = html .fromstring (open ( ' analytics.html' ). read ())
119
- load_toc_script = open ( ' load_toc.js' ). read ()
116
+ comments_html = Path ( " disqus_comments.html" ). read_text ()
117
+ buy_book_div = html .fromstring (Path ( " buy_the_book_banner.html" ). read_text ())
118
+ analytics_div = html .fromstring (Path ( " analytics.html" ). read_text ())
119
+ load_toc_script = Path ( " load_toc.js" ). read_text ()
120
120
121
121
for chapter in CHAPTERS :
122
- old_contents = open (chapter ).read ()
122
+ old_contents = Path (chapter ).read_text ()
123
123
new_contents = fix_xrefs (old_contents , chapter , chapter_info )
124
124
new_contents = fix_title (new_contents , chapter , chapter_info )
125
125
parsed = html .fromstring (new_contents )
126
- body = parsed .cssselect ('body' )[0 ]
127
- if parsed .cssselect ('#header' ):
128
- head = parsed .cssselect ('head' )[0 ]
129
- head .append (html .fragment_fromstring ('<script>' + load_toc_script + '</script>' ))
130
- body .set ('class' , 'article toc2 toc-left' )
126
+ body = parsed .cssselect ("body" )[0 ]
127
+ if parsed .cssselect ("#header" ):
128
+ head = parsed .cssselect ("head" )[0 ]
129
+ head .append (
130
+ html .fragment_fromstring ("<script>" + load_toc_script + "</script>" )
131
+ )
132
+ body .set ("class" , "article toc2 toc-left" )
131
133
body .insert (0 , buy_book_div )
132
- body .append (html .fromstring (
133
- comments_html .replace ('CHAPTER_NAME' , chapter .split ('.' )[0 ])
134
- ))
134
+ body .append (
135
+ html .fromstring (
136
+ comments_html .replace ("CHAPTER_NAME" , chapter .split ("." )[0 ])
137
+ )
138
+ )
135
139
body .append (analytics_div )
136
140
fixed_contents = html .tostring (parsed )
137
141
138
- with open (DEST / chapter , 'w' ) as f :
139
- f .write (fixed_contents .decode (' utf8' ))
140
- with open (DEST / ' toc.html' , 'w' ) as f :
141
- f .write (html .tostring (fixed_toc ).decode (' utf8' ))
142
+ with open (DEST / chapter , "w" ) as f :
143
+ f .write (fixed_contents .decode (" utf8" ))
144
+ with open (DEST / " toc.html" , "w" ) as f :
145
+ f .write (html .tostring (fixed_toc ).decode (" utf8" ))
142
146
143
147
144
148
def extract_toc_from_book ():
145
- subprocess .check_call (['make' , 'book.html' ], stdout = subprocess .PIPE )
146
- parsed = html .fromstring (open ('book.html' ).read ())
147
- return parsed .cssselect ('#toc' )[0 ]
148
-
149
+ subprocess .check_call (["make" , "book.html" ], stdout = subprocess .PIPE )
150
+ parsed = html .fromstring (Path ("book.html" ).read_text ())
151
+ return parsed .cssselect ("#toc" )[0 ]
149
152
150
153
151
154
def fix_toc (toc , chapter_info ):
152
155
href_mappings = {}
153
156
for chapter in CHAPTERS :
154
157
chap = chapter_info [chapter ]
155
158
if chap .href_id :
156
- href_mappings ['#' + chap .href_id ] = f' /book/{ chapter } '
159
+ href_mappings ["#" + chap .href_id ] = f" /book/{ chapter } "
157
160
for subheader in chap .subheaders :
158
- href_mappings ['#' + subheader ] = f' /book/{ chapter } #{ subheader } '
161
+ href_mappings ["#" + subheader ] = f" /book/{ chapter } #{ subheader } "
159
162
160
163
def fix_link (href ):
161
164
if href in href_mappings :
@@ -164,18 +167,21 @@ def fix_link(href):
164
167
return href
165
168
166
169
toc .rewrite_links (fix_link )
167
- toc .set (' class' , ' toc2' )
170
+ toc .set (" class" , " toc2" )
168
171
return toc
169
172
170
173
171
174
def print_toc_md (chapter_info ):
172
175
for chapter in CHAPTERS :
173
176
title = chapter_info [chapter ].chapter_title
174
- print (f' * [{ title } ](/book/{ chapter } )' )
177
+ print (f" * [{ title } ](/book/{ chapter } )" )
175
178
176
179
177
180
def rsync_images ():
178
- subprocess .run (['rsync' , '-a' , '-v' , 'images/' , DEST / 'images/' ])
181
+ subprocess .run (
182
+ ["rsync" , "-a" , "-v" , "images/" , DEST / "images/" ],
183
+ check = True ,
184
+ )
179
185
180
186
181
187
def main ():
@@ -188,5 +194,5 @@ def main():
188
194
print_toc_md (chapter_info )
189
195
190
196
191
- if __name__ == ' __main__' :
197
+ if __name__ == " __main__" :
192
198
main ()
0 commit comments