1+ import asyncio
12import re
23import subprocess
4+
35from pathlib import Path
46
57from .version import VERSION
68
9+ __all__ = [
10+ 'AsyncPydf' ,
11+ 'generate_pdf' ,
12+ 'get_version' ,
13+ 'get_help' ,
14+ 'get_extended_help' ,
15+ ]
16+
717THIS_DIR = Path (__file__ ).parent .resolve ()
8- WK_PATH = THIS_DIR / 'bin' / 'wkhtmltopdf'
18+ WK_PATH = str ( THIS_DIR / 'bin' / 'wkhtmltopdf' )
919
1020
11- def execute_wk (* args , input = None ):
21+ def _execute_wk (* args , input = None ):
1222 """
1323 Generate path for the wkhtmltopdf binary and execute command.
1424
1525 :param args: args to pass straight to subprocess.Popen
1626 :return: stdout, stderr
1727 """
18- wk_args = (str ( WK_PATH ) ,) + args
28+ wk_args = (WK_PATH ,) + args
1929 return subprocess .run (wk_args , input = input , stdout = subprocess .PIPE , stderr = subprocess .PIPE )
2030
2131
22- def generate_pdf (source , * ,
32+ def _convert_args (py_args ):
33+ cmd_args = []
34+ for name , value in py_args .items ():
35+ if value in {None , False }:
36+ continue
37+ arg_name = '--' + name .replace ('_' , '-' )
38+ if value is True :
39+ cmd_args .append (arg_name )
40+ else :
41+ cmd_args .extend ([arg_name , str (value )])
42+
43+ # read from stdin and write to stdout
44+ cmd_args .extend (['-' , '-' ])
45+ return cmd_args
46+
47+
48+ def _set_meta_data (pdf_content , ** kwargs ):
49+ fields = [
50+ ('Title' , kwargs .get ('title' )),
51+ ('Author' , kwargs .get ('author' )),
52+ ('Subject' , kwargs .get ('subject' )),
53+ ('Creator' , kwargs .get ('creator' )),
54+ ('Producer' , kwargs .get ('producer' )),
55+ ]
56+ metadata = '\n ' .join (f'/{ name } ({ value } )' for name , value in fields if value )
57+ if metadata :
58+ pdf_content = re .sub (b'/Title.*\n .*\n /Producer.*' , metadata .encode (), pdf_content , count = 1 )
59+ return pdf_content
60+
61+
62+ class AsyncPydf :
63+ def __init__ (self , * , max_processes = 20 , loop = None ):
64+ self .semaphore = asyncio .Semaphore (value = max_processes , loop = loop )
65+ self .loop = loop
66+
67+ async def generate_pdf (self ,
68+ html ,
69+ title = None ,
70+ author = None ,
71+ subject = None ,
72+ creator = None ,
73+ producer = None ,
74+ ** cmd_args ):
75+ cmd_args = [WK_PATH ] + _convert_args (cmd_args )
76+ async with self .semaphore :
77+ p = await asyncio .create_subprocess_exec (
78+ * cmd_args ,
79+ stdin = asyncio .subprocess .PIPE ,
80+ stdout = asyncio .subprocess .PIPE ,
81+ stderr = asyncio .subprocess .PIPE ,
82+ loop = self .loop
83+ )
84+ p .stdin .write (html .encode ())
85+ p .stdin .close ()
86+ await p .wait ()
87+ pdf_content = await p .stdout .read ()
88+ if p .returncode != 0 and pdf_content [:4 ] != b'%PDF' :
89+ stderr = await p .stderr .read ()
90+ raise RuntimeError ('error running wkhtmltopdf, command: {!r}\n '
91+ 'response: "{}"' .format (cmd_args , stderr .strip ()))
92+
93+ return _set_meta_data (
94+ pdf_content ,
95+ title = title ,
96+ author = author ,
97+ subject = subject ,
98+ creator = creator ,
99+ producer = producer ,
100+ )
101+
102+
103+ def generate_pdf (html , * ,
23104 title = None ,
24105 author = None ,
25106 subject = None ,
@@ -55,7 +136,7 @@ def generate_pdf(source, *,
55136 Arguments which are True are passed with no value eg. just --quiet, False
56137 and None arguments are missed, everything else is passed with str(value).
57138
58- :param source : html string to generate pdf from or url to get
139+ :param html : html string to generate pdf from
59140 :param grayscale: bool
60141 :param lowquality: bool
61142 :param margin_bottom: string eg. 10mm
@@ -71,7 +152,7 @@ def generate_pdf(source, *,
71152 :param extra_kwargs: any exotic extra options for wkhtmltopdf
72153 :return: string representing pdf
73154 """
74- if source .lstrip ().startswith (('http' , 'www' )):
155+ if html .lstrip ().startswith (('http' , 'www' )):
75156 raise ValueError ('pdf generation from urls is not supported' )
76157
77158 py_args = dict (
@@ -90,43 +171,29 @@ def generate_pdf(source, *,
90171 image_quality = image_quality ,
91172 )
92173 py_args .update (extra_kwargs )
93- cmd_args = []
94- for name , value in py_args .items ():
95- if value in {None , False }:
96- continue
97- arg_name = '--' + name .replace ('_' , '-' )
98- if value is True :
99- cmd_args .append (arg_name )
100- else :
101- cmd_args .extend ([arg_name , str (value )])
102-
103- # read from stdin and write to stdout
104- cmd_args += ['-' , '-' ]
174+ cmd_args = _convert_args (py_args )
105175
106- p = execute_wk (* cmd_args , input = source .encode ())
107- pdf_bytes = p .stdout
176+ p = _execute_wk (* cmd_args , input = html .encode ())
177+ pdf_content = p .stdout
108178
109179 # it seems wkhtmltopdf's error codes can be false, we'll ignore them if we
110180 # seem to have generated a pdf
111- if p .returncode != 0 and pdf_bytes [:4 ] != b'%PDF' :
181+ if p .returncode != 0 and pdf_content [:4 ] != b'%PDF' :
112182 raise RuntimeError ('error running wkhtmltopdf, command: {!r}\n '
113183 'response: "{}"' .format (cmd_args , p .stderr .strip ()))
114184
115- fields = [
116- ('Title' , title ),
117- ('Author' , author ),
118- ('Subject' , subject ),
119- ('Creator' , creator ),
120- ('Producer' , producer ),
121- ]
122- metadata = '\n ' .join (f'/{ name } ({ value } )' for name , value in fields if value )
123- if metadata :
124- pdf_bytes = re .sub (b'/Title.*\n .*\n /Producer.*' , metadata .encode (), pdf_bytes , count = 1 )
125- return pdf_bytes
185+ return _set_meta_data (
186+ pdf_content ,
187+ title = title ,
188+ author = author ,
189+ subject = subject ,
190+ creator = creator ,
191+ producer = producer ,
192+ )
126193
127194
128195def _string_execute (* args ):
129- return execute_wk (* args ).stdout .decode ().strip (' \n ' )
196+ return _execute_wk (* args ).stdout .decode ().strip (' \n ' )
130197
131198
132199def get_version ():
0 commit comments