11import hashlib
2+ import json
23import pathlib
34import subprocess
45import sqlite_utils
56import tempfile
7+ import zlib
68
79root = pathlib .Path (__file__ ).parent .resolve ()
810TMP_PATH = pathlib .Path (tempfile .gettempdir ())
9- SHOT_HASH_PATHS = [
10- (root / "templates" / "row.html" ),
11- (root / "templates" / "til_base.html" ),
12- ]
1311
12+ # Change the following tuple manually any time the templates have changed
13+ # to a point that all of the screenshots need to be re-taken
14+ # https://github.com/simonw/til/issues/82
15+ _decompress = lambda compressed : zlib .decompress (compressed ).decode ("utf-8" )
16+ SHOT_HASH_ELEMENTS = (
17+ # Compressed HTML from the last time this ran against the actual templates
18+ # Delete this entirely - and the import zlib line - the first time
19+ # SHOT_HASH_ELEMENTS needs to be manually invalidated.
20+ _decompress (
21+ b"x\x9c \xb5 VQo\xdb 6\x10 ~\xf7 \xaf \xb8 *\xc8 d\xaf \xae \x94 \xd6 \xd9 V\xb8 \xb6 "
22+ b"\xb0 \xa0 \r \xb0 \x01 \xc5 \x1e \x96 \x02 {(\x06 \x83 \x16 i\x93 +E\xaa $\xd5 \xcc u\x0c "
23+ b"\xec o\xec \xef \xed \x97 \xec HJ\x8e \xed \xc4 \xee \x86 ay\x88 \xa4 \xe3 w"
24+ b"\xdf \x1d \xef \x8e \x1f \xbd >\x07 \xf6 \xbb c\x8a ZH\x9c \x90 \xb3 9\xb1 ,\xe3 \xae \x92 \t "
25+ b"\x9c oz\xbd \xf5 9X\xe6 \x00 W,L\xc1 ~\x94 \xfd $Iz\x80 \x7f \x96 IV:\xf8 \x1a \x16 "
26+ b"FW\x1e \x00 \xb7 \x9c \x19 \x06 5q\x1c \xa1 c\xa7 kQ\xc2 \xdd \x1d \xa4 \xb3 \xd4 ?\xc6 "
27+ b"V6\xcb \xf0 \x9d U4\xed !\xcd \x10 \xd6 I@%c\x08 \xcf !$\x1e \x84 \x9f \xfe \xb1 \x19 \x02 %"
28+ b"\x8e \xf8 \x84 \xa6 >5\x9b \x0c z\x98 \xd2 }F\x18 \xc5 \x9b \xdf _\xfc \n \xd1 .\x16 \xa0 "
29+ b"t\x9b ,Z|\x96 -\xda \x10 a\x19 E\x87 \xf0 2\xbb \xbc \xb8 \xec '\xef ~|\x1b \xe0 \x0b \xdd ("
30+ b"\x9a \x0c Z\x0e \xac \x04 \xd2 \xb4 [\x9f K]~@>'\x19 \x9a \xd6 kO\x9d \xc5 \xcf "
31+ b"\xcd \x06 \xee \xe0 FTZ\xc1 /BJa\xb5 \xfa \xeb \x8f ?- \xab \x8d 4\xd1 y\x8f \t +m"
32+ b'\xc8 \x8c 3B\xbd }"\x85 \xfa \x00 \x86 \xc9 ib\xdd J2\xcb \x19 s\t p\xc3 \x16 \xd3 \x84 ;W'
33+ b"\xdb q\x9e \xfb \x88 \xd6 \x87 \xb9 m\xa3 d\x8a \xb9 \xdc :\xe2 D\x99 /\x85 "
34+ b"\xe3 \xcd \xfc \x99 \x14 K\xee \xb2 \xd2 \xda \xa4 \xe8 M*\xe6 \x08 (R\xf9 \xa2 "
35+ b"\xdd \n \xe7 \x98 \x19 \x97 \xc4 \xd0 \x04 J\xad \xb0 \xcf \x0e \x83 5UE\xcc j&\x89 Y\xb2 "
36+ b"\x99 \xa8 \xc8 \x92 \x1d s4\x8c 8mv|\xbf \x8f \xb9 \x1c \xc1 \x87 \xd2 \xec \xa0 \x0f *v"
37+ b"\xc4 \x8b 2[\x1a Q;\xa1 \xd5 C\xdf 6\xd9 \xe3 \xde 1\xff {\xbf \xae pv\x94 \x91 "
38+ b"\x8a |\xd6 \x8a \xdc \xda \xac \xd4 \xd5 \x91 Jva\xb8 v3N,\xc7 @\xd9 o\xf5 \xf2 T\xb0 1\x91 "
39+ b"n'\xe0 \r V\x89 )O\x00 z\x01 \x8e \x0b \x0b \xc4 `w\xe4 }Uk\xa3 kf\xdc j\x9a \xe8 "
40+ b"\xe5 \xb8 1\xf2 \x91 tOf\x17 \xcf \xd2 f\xb3 \xcd \xd6 \x1f \xa5 \x9d \x8a \xec "
41+ b"\xf1 \xbb U\xbd [\x8f \x93 \xb9 \xfc \xd3 \x86 \xed 9\xfd \xdb ~\xed 9\xff \xcf "
42+ b"\xed z\x18 \xeb ?v+r\xdc \n \xea \xf8 \x0e \xcb \xcb \x8b \x8b Sp\xce \xfc \x89 \xdc \xc1 "
43+ b"_\x06 \xfc Q]\x98 k\xba \n \x8a \xc0 \x9f \x17 \x07 -\x98 \xe4 hCh\xb4 za\xbe \xb3 d\xe1 W"
44+ b"\xb6 \xe2 \x8c \n B\\ T\xb7 \xf8 6\xf3 \n \xd8 \xc7 \x7f \x83 {a\xec @A\x17 '|T\xfc \x1c \r "
45+ b"\xc8 ?*\xbc \xa9 \x91 PJb\xed 4i\xa1 I\xd1 )\xe8 B\x9b \xa0 \xb7 B\xed \xd3 "
46+ b"\xa0 \x97 \x14 \xc5 \xc4 \xd6 Du\xbe Q\xcf \x8b \x83 \xb9 \x9d \xe4 \x1e S\xc0 \x84 "
47+ b"\xb4 \xea \xf6 \xc5 \xc1 ~P\x05 R\xc0 3h\xad A\x96 \x18 }?~\x8e \xd2 \xef \x17 1\x8b .W,"
48+ b"\xb0 O7n3od\xb1 /\xea \x93 \xba \xcb \xb4 %I\x8a \xd7 \xf1 \xe5 \x80 \x1c yc\xe1 vmO"
49+ b"\xc2 \x8d \x93 55m\x8b 0\x84 \xee \xbd \xf5 \xee >\xf1 z\xf8 \xaa \x12 \x94 j\xf7 \xea ~"
50+ b"\xd7 \x1d \xc6 \xc8 ;\xc3 jIJ\xd6 \xdf \xce \x7f \x94 \xf2 0\xf9 q\xea \xfd \x01 \xc8 q<"
51+ b"\xe6 9^\x94 _\x80 \xe1 g%\x9c \xcd \xf1 \x16 \xf3 \xc5 \xfb AXT\xed \x95 /\xda \xce "
52+ b"\xe6 O&\x14 \xfc \xae \xa9 p\xde i\x92 \xd7 8r\x93 x\xca \x8b ^\x9e \xc3 \x15 \xa5 \xf0 "
53+ b"IX1\xc7 n\x9c \x81 \xbf \xb7 ,^\xd9 @\xa4 \x04 \xfe \xe2 )\xf8 \x1b M\xa8 \xa5 \x05 \xe8 Q]6"
54+ b"\x15 \xce |\xf6 \xb1 afu\x13 ~$hs%e?\xe5 /\x86 |4\xe4 \x97 C\xfe \xcd \x90 \x7f "
55+ b"\x9b \x0e 2l\xd4 5)y\x9f \xe1 U^\xc0 \x1a ;&q\x9c \x89 *9vp\n L\xee \x93 \xf4 "
56+ b"S\x92 \xc5 \xc5 t\xf0 \n \xc1 \xb8 \xab \xfe \x93 h\x18 \x04 o\xc0 \xf1 t\x8d Q~q\xd3 "
57+ b'\xb2 \t \x7f ."([2w\xe5 \x9c \x11 \xf3 \xc6 \xb1 ~*h\xa4 \xc1 0\x86 U\xfa \x13 {\xcd \x85 '
58+ b"\xa4 \xfd \x96 \xb0 ]\xb1 \x87 .Cd\x0c \x8b \x9e \xdc +\xd0 [\x7f \x89 Oa\xbb \xef 8"
59+ b"+\xd7 \x92 \xf9 /L9\x06 \xe9 \x90 Y\xb8 \xe9 3\x87 \xbf \x04 \xde \xb0 R\x1b \xe2 U\x14 "
60+ b"\xdd S\xa5 \x15 K\x1f A\x96 Z\x86 b\xa4 g\xf3 \xef \xe6 \xa3 \xf9 \xe8 1\xcc \x02 e\xe6 "
61+ b"F|f\x1e v\x91 \xbd d\xd5 \x01 ho\x0f \xbe \xf3 \xb8 \x8b \xf4 ,\x85 \xa7 \xdd ^\xb6 "
62+ b"P\xa1 \x14 3\xef 0\xb9 \x10 1m\x8b @\xea \x1a \x87 (\x96 \xe7 `\x9f \x1e \xfb \x93 \xa6 "
63+ b"\xc8 \x0b \xe9 p\xb5 \xd3 \xe2 8qaP\x1e Rt\xd1 \x10 \xbe \x19 \xf4 P\x1b \xda 9"
64+ b";\x90 \xc7 \xbf \x01 *r\x94 d"
65+ ),
66+ _decompress (
67+ b"x\x9c }U\xeb \x8e \xe3 4\x14 \xfe \xdf \xa7 0A\xd5 NE.\xed t\xa6 \x9d \xc9 4\x15 \x88 E,"
68+ b"\x12 \x02 \x04 #!~!\xd7 >m\xbc \xe3 \xd8 \xc1 vo\xbb \xaa \xc4 k\xf0 z<\t \xc7 q\xd2 M"
69+ b"\x99 \xd9 i\xd5 \xa6 >=\xe7 ;\xdf \xb9 z\xf1 \xc5 \xdb \x9f \xbf }\xfc \xe3 \x97 \xef H\xe9 "
70+ b"*\xb9 \x1c ,\xfc \x83 H\xaa 6E\x04 *\xf2 \x02 \xa0 \x1c \x1f \x15 8JXI\x8d \x05 WD[\xb7 "
71+ b"N\xee \xa2 N\xac h\x05 E\xb4 \x13 \xb0 \xaf \xb5 q\x11 aZ9P\xa8 \xb6 \x17 \xdc \x95 "
72+ b"\x05 \x87 \x9d `\x90 4\x87 \x98 \x08 %\x9c \xa0 2\xb1 \x8c J(&\x1e \xc4 \t 'a\xf9 qHVR"
73+ b"\xb3 '\xd2 \x1c \xc9 \xf0 \x84 \x02 P<\xc8 \x86 \xa7 E\x16 \xd4 \x06 \x0b \xcb \x8c "
74+ b"\xa8 \x1d \xe1 \xb0 \x06 C8u4\xe1 \xba \xa2 B\x15 \x91 \x13 2\xb5 \xa2 \xd2 j/\xa4 \x14 "
75+ b"V\xab T\x01 \xf2 \xb1 \x86 \x15 Q\xe9 \\ m\xf3 ,\xab %\xdd Z\xb1 \x92 \x90 \n \x9d \xbd "
76+ b"\xb7 \xbd \xf3 {\x1b -\x17 Y@G7R\xa8 'b@\x16 \x11 \x95 \x0e \x8c \xa2 \x0e \" "
77+ b'\xe2 \x8e 5\x86 J\xeb Z\n F\x9d \xd0 *\xa3 NW_\x1d *\x19 \x05 \xde E\xf4 \r \n "R\x1a '
78+ b"X\x17 \x11 r\x96 6[\x03 \xf0 \xd4 \xeb a\xb0 \xe7 (\xe1 \xe0 \x0c \xfd \xd3 g\xf7 Y\xa8 "
79+ b"\x18 \xa2 ;\xfa PW\x9a \x1f \xc9 \xc7 \x01 \xc1 \xd7 \x1a s\x9a \xac i%\xe4 1'\xd1 ;\x90 ;pH"
80+ b"\x81 \xfc \x04 [\x88 bRv\x82 \x98 X\xaa lb\xc1 \x88 \xf5 Cc\x89 q@R\x82 \xd8 \x94 .'"
81+ b"\x93 \xf4 &H+j6B\xe5 d\x1c \x8e 5\xe5 \\ \xa8 Ms>\r \xca \xc9 K^\xbf \x07 \x8d 6"
82+ b"\xe8 \xe1 \xcd \xa3 \xa8 \xc0 \xa2 \xeb =\xf9 \x15 3\xaf \xde \xc4 \xa4 \x91 \xa0 \xef \xe0 "
83+ b"\xf6 4H\x99 \x01 L\x18 oqV\xda p0\x89 \xd3 5R\xa8 \x0f \xc4 j)8\xf9 \x92 1v\xe1 "
84+ b"\xbe U\x80 \xea \xe1 \x93 w+>\x00 \xd2 J\xef \xbc \x14 \x81 QE\xb0 \x0e \x96 \xb2 \xa7 \x8d "
85+ b"\xd1 [\xc5 \x13 \xa6 \xa5 6\xf9 s\xc8 \x9c L\xeb \xc3 C\x8f \xc4 \x05 \x81 \xf9 "
86+ b"|\xfe \x82 \xab \xd9 m\xc7 \xa0 \xd6 V\xf8 *\xe7 \xbe \x11 \xb0 \xde ;\x08 \xf2 \x86 g2\xf1 "
87+ b"\xc8 gJ\xb4 %\xe5 \xb0 \xb2 \t \x07 \xa6 \r \r \xa6 J\xab \xd6 \xac %\xb9 \x92 \xc8 \xdb ["
88+ b"\xd6 \x06 Z\xa3 })\x1c $\xb6 \xa6 \x0c \x19 \xa0 8\xd9 \x1b Z{\x15 Ew}\\ *\xc5 \x06 !%"
89+ b"\xac \xdd \xc3 \xff R\x90 7\x95 \xa6 &\xd9 \x18 \xca \x05 \xce \xdf \x95 \xd3 \x18 \xb3 "
90+ b"\xc3 \xbe \x8b \x89 \xd9 \xac \xae &\xb7 71\x99 \x8c \xa7 \xf8 5\xbf \x1d \x91 \xf1 "
91+ b"0\x88 \xef g1\x99 _\xa3 \xf0 \xfa ~Dn\xee [\xe9 d<\x8e \xc9 l\x8e \xe2 \xe9 x\x84 V\xe3 "
92+ b'\xe1 \xe8 "\x86 \x86 pG\xb0 n)raq\x9c \x8e yh\xf1 \xd7 \x1b \xed \x06 \xab 0\xc6 \x8f \x7f ^'
93+ b"\x87 \xda z(\x9a \xfb \xb9 \x8b \xdb \xdf ;\x81 \xf9 \x07 \xde \x1d K\xbd \x03 \xd3 "
94+ b"\x1d \xd6 \x9a mmw\xa0 \xcc \x17 \xa7 \xa5 qI\xf1 \x95 \x9a \x9c \x06 \x16 \x98 "
95+ b"\x17 \xa4 \xbd I\xfb 4\n )\xf6 A\xe0 \x16 \x02 9\x84 \x1d \x96 \x93 \xbb \xf1 8\x94 \xfe \xeb "
96+ b"\n \xb8 \xa0 \xe4 \xaa \xf7 \xdf \xcc \xff 7j\xb1 ^\x80 \xbf t\x81 \x0e \xce \xed ~j\xbe \xfb "
97+ b"\xf9 \xfc l\xc2 z6\xa7 \x01 N$vL\x8c a\xf3 .\x03 >Ok\xa9 \xf7 M\x1b a9p\x18 "
98+ b"\x9f \x92 =\xf6 \xbf '\xed (n\xbb \xcb \xc1 \xc4 \x8c IZ[\xec \xbd \xee W\xa3 \x88 \xeb "
99+ b"\xda \xf1 \xe7 i\x99 v\xde _\x98 \xa8 \xb7 S\xff \x0e \xd6 \x9f \x9f R\x80 \xc6 "
100+ b"\xc1 V\xa6 \xcd \\ \x9d \xf7 \x04 .m\x1c C\xbf \xfc \x12 \xbf i\xfb \xb3 \xf3 "
101+ b"\xea \xc2 \xea \x01 I\xd1 b\x05 \x83 $L\xc0 \x99 \xf6 i\xd0 \xf4 \xe6 _[\xed \xe0 B\xb1 "
102+ b"\xd9 ;\x1d z\x9b \x16 ?f\xde r\xee \x1b \xa1 \r \xf0 \x1e f\xab \xd5 \xed 5\\ .\xae "
103+ b'\xbe \xa6 w\x82 \xf7 HX\xe1 \x8b \xac \xbd @}\x0b \xe0 \x03 \xcb \xbb l,\x17 \xf5 rA\xbb k"Z'
104+ b"\xfe \xe6 \xaf -\xf2 {{o\xfd \xfb \xf7 ?\x96 <\xfe \xf0 \xa3 ]d\x14 \xef \xa4 \xda \xe3 "
105+ b'4\x96 \x8b \xb6 \xa3 \x08 \x93 \xd4 \xda "\xf2 \xa8 \xfd [\xa5 i\xb4 \xe7 \xf7 I'
106+ b"\xd6 \x9a y\x9c \x96 H\x16 .\xfc \xff \x00 W\x90 y)"
107+ ),
108+ )
14109
15- def png_for_path (path ):
110+
111+ def s3_contents ():
112+ proc = subprocess .run (
113+ ["s3-credentials" , "list-bucket" , "til.simonwillison.net" ], capture_output = True
114+ )
115+ return [item ["Key" ] for item in json .loads (proc .stdout )]
116+
117+
118+ def jpeg_for_path (path ):
16119 page_html = str (TMP_PATH / "generate-screenshots-page.html" )
17120 # Use datasette to generate HTML
18121 proc = subprocess .run (["datasette" , "." , "--get" , path ], capture_output = True )
19122 open (page_html , "wb" ).write (proc .stdout )
20- # Now use puppeteer screenshot to generate a PNG
123+ # Now use shot-scraper to generate a PNG
21124 proc2 = subprocess .run (
22125 [
23- "puppeteer " ,
24- "screenshot " ,
126+ "shot-scraper " ,
127+ "shot " ,
25128 page_html ,
26- "--viewport" ,
27- "800x400" ,
28- "--full-page=false" ,
129+ "-w" ,
130+ "800" ,
131+ "-h" ,
132+ "400" ,
133+ "--retina" ,
134+ "--quality" ,
135+ "60" ,
136+ "-o" ,
137+ "-" ,
29138 ],
30139 capture_output = True ,
31140 )
32- png_bytes = proc2 .stdout
33- return png_bytes
141+ return proc2 .stdout
34142
35143
36144def generate_screenshots (root ):
37145 db = sqlite_utils .Database (root / "tils.db" )
38146
39- # The shot_hash incorporates a hash of all of row.html
147+ # If the old 'shot' column exists, drop it
148+ if "shot" in db ["til" ].columns_dict :
149+ db ["til" ].transform (drop = ["shot" ])
40150
151+ # shot_hash incorporates a hash of key templates
41152 shot_html_hash = hashlib .md5 ()
42- for filepath in SHOT_HASH_PATHS :
43- shot_html_hash .update (filepath . read_text () .encode ("utf-8" ))
153+ for element in SHOT_HASH_ELEMENTS :
154+ shot_html_hash .update (element .encode ("utf-8" ))
44155 shot_html_hash = shot_html_hash .hexdigest ()
45156
157+ s3_keys = s3_contents ()
158+
46159 for row in db ["til" ].rows :
47160 path = row ["path" ]
48161 html = row ["html" ]
49162 shot_hash = hashlib .md5 ((shot_html_hash + html ).encode ("utf-8" )).hexdigest ()
50- if shot_hash != row .get ("shot_hash" ):
51- png = png_for_path ("/{}/{}" .format (row ["topic" ], row ["slug" ]))
52- db ["til" ].update (path , {"shot" : png , "shot_hash" : shot_hash }, alter = True )
163+ shot_filename = "{}.jpg" .format (shot_hash )
164+ if shot_hash != row .get ("shot_hash" ) or shot_filename not in s3_keys :
165+ jpeg = jpeg_for_path ("/{}/{}" .format (row ["topic" ], row ["slug" ]))
166+ db ["til" ].update (path , {"shot_hash" : shot_hash }, alter = True )
167+ # Store it to S3
168+ subprocess .run (
169+ [
170+ "s3-credentials" ,
171+ "put-object" ,
172+ "til.simonwillison.net" ,
173+ shot_filename ,
174+ "-" ,
175+ "--content-type" ,
176+ "image/jpeg" ,
177+ "--silent" ,
178+ ],
179+ input = jpeg ,
180+ )
53181 print (
54- "Got {} byte PNG for {} shot hash {}" .format (len (png ), path , shot_hash )
182+ "Stored {} byte JPEG for {} shot hash {}" .format (
183+ len (jpeg ), path , shot_hash
184+ )
55185 )
56186 else :
57187 print ("Skipped {} with shot hash {}" .format (path , shot_hash ))
58188
59189
60190if __name__ == "__main__" :
61- generate_screenshots (root )
191+ generate_screenshots (root )
0 commit comments