11import re
22import subprocess
33from pathlib import Path
4- from tempfile import NamedTemporaryFile
54
65from .version import VERSION
76
87THIS_DIR = Path (__file__ ).parent .resolve ()
98WK_PATH = THIS_DIR / 'bin' / 'wkhtmltopdf'
109
1110
12- def execute_wk (* args ):
11+ def execute_wk (* args , input = None ):
1312 """
1413 Generate path for the wkhtmltopdf binary and execute command.
1514
1615 :param args: args to pass straight to subprocess.Popen
1716 :return: stdout, stderr
1817 """
1918 wk_args = (str (WK_PATH ),) + args
20- p = subprocess .Popen (wk_args , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
21- stdout , stderr = p .communicate ()
22- return stdout , stderr , p .returncode
19+ return subprocess .run (wk_args , input = input , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
2320
2421
2522def generate_pdf (source , * ,
@@ -30,7 +27,6 @@ def generate_pdf(source, *,
3027 producer = None ,
3128 # from here on arguments are passed via the commandline to wkhtmltopdf
3229 cache_dir = None ,
33- quiet = True ,
3430 grayscale = False ,
3531 lowquality = False ,
3632 margin_bottom = None ,
@@ -60,7 +56,6 @@ def generate_pdf(source, *,
6056 and None arguments are missed, everything else is passed with str(value).
6157
6258 :param source: html string to generate pdf from or url to get
63- :param quiet: bool
6459 :param grayscale: bool
6560 :param lowquality: bool
6661 :param margin_bottom: string eg. 10mm
@@ -77,11 +72,10 @@ def generate_pdf(source, *,
7772 :return: string representing pdf
7873 """
7974 if source .lstrip ().startswith (('http' , 'www' )):
80- raise RuntimeError ('pdf generation from urls is not supported' )
75+ raise ValueError ('pdf generation from urls is not supported' )
8176
8277 py_args = dict (
8378 cache_dir = cache_dir ,
84- quiet = quiet ,
8579 grayscale = grayscale ,
8680 lowquality = lowquality ,
8781 margin_bottom = margin_bottom ,
@@ -106,6 +100,18 @@ def generate_pdf(source, *,
106100 else :
107101 cmd_args .extend ([arg_name , str (value )])
108102
103+ # read from stdin and write to stdout
104+ cmd_args += ['-' , '-' ]
105+
106+ p = execute_wk (* cmd_args , input = source .encode ())
107+ pdf_bytes = p .stdout
108+
109+ # it seems wkhtmltopdf's error codes can be false, we'll ignore them if we
110+ # seem to have generated a pdf
111+ if p .returncode != 0 and pdf_bytes [:4 ] != b'%PDF' :
112+ raise RuntimeError ('error running wkhtmltopdf, command: {!r}\n '
113+ 'response: "{}"' .format (cmd_args , p .stderr .strip ()))
114+
109115 fields = [
110116 ('Title' , title ),
111117 ('Author' , author ),
@@ -114,29 +120,13 @@ def generate_pdf(source, *,
114120 ('Producer' , producer ),
115121 ]
116122 metadata = '\n ' .join (f'/{ name } ({ value } )' for name , value in fields if value )
117-
118- with NamedTemporaryFile (suffix = '.html' , mode = 'wb' ) as html_file :
119- html_file .write (source .encode ())
120- html_file .flush ()
121- html_file .seek (0 )
122- with NamedTemporaryFile (suffix = '.pdf' , mode = 'rb' ) as pdf_file :
123- cmd_args += [html_file .name , pdf_file .name ]
124- _ , stderr , returncode = execute_wk (* cmd_args )
125- pdf_file .seek (0 )
126- pdf_bytes = pdf_file .read ()
127- # it seems wkhtmltopdf's error codes can be false, we'll ignore them if we
128- # seem to have generated a pdf
129- if returncode != 0 and pdf_bytes [:4 ] != b'%PDF' :
130- raise RuntimeError ('error running wkhtmltopdf, command: {!r}\n '
131- 'response: "{}"' .format (cmd_args , stderr .strip ()))
132-
133- if metadata :
134- pdf_bytes = re .sub (b'/Title.*\n .*\n /Producer.*' , metadata .encode (), pdf_bytes , count = 1 )
135- return pdf_bytes
123+ if metadata :
124+ pdf_bytes = re .sub (b'/Title.*\n .*\n /Producer.*' , metadata .encode (), pdf_bytes , count = 1 )
125+ return pdf_bytes
136126
137127
138128def _string_execute (* args ):
139- return execute_wk (* args )[ 0 ] .decode ().strip (' \n ' )
129+ return execute_wk (* args ). stdout .decode ().strip (' \n ' )
140130
141131
142132def get_version ():
0 commit comments