Skip to content

Commit a26b687

Browse files
committed
use stdin and stdout not named temporary files
1 parent 4d4629e commit a26b687

File tree

3 files changed

+30
-32
lines changed

3 files changed

+30
-32
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ env
1313
.cache/
1414
benchmark/pdf_cache/
1515
benchmark/output/
16+
htmlcov/

pydf/wkhtmltopdf.py

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,22 @@
11
import re
22
import subprocess
33
from pathlib import Path
4-
from tempfile import NamedTemporaryFile
54

65
from .version import VERSION
76

87
THIS_DIR = Path(__file__).parent.resolve()
98
WK_PATH = THIS_DIR / 'bin' / 'wkhtmltopdf'
109

1110

12-
def execute_wk(*args):
11+
def execute_wk(*args, input=None):
1312
"""
1413
Generate path for the wkhtmltopdf binary and execute command.
1514
1615
:param args: args to pass straight to subprocess.Popen
1716
:return: stdout, stderr
1817
"""
1918
wk_args = (str(WK_PATH),) + args
20-
p = subprocess.Popen(wk_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
21-
stdout, stderr = p.communicate()
22-
return stdout, stderr, p.returncode
19+
return subprocess.run(wk_args, input=input, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2320

2421

2522
def generate_pdf(source, *,
@@ -30,7 +27,6 @@ def generate_pdf(source, *,
3027
producer=None,
3128
# from here on arguments are passed via the commandline to wkhtmltopdf
3229
cache_dir=None,
33-
quiet=True,
3430
grayscale=False,
3531
lowquality=False,
3632
margin_bottom=None,
@@ -60,7 +56,6 @@ def generate_pdf(source, *,
6056
and None arguments are missed, everything else is passed with str(value).
6157
6258
:param source: html string to generate pdf from or url to get
63-
:param quiet: bool
6459
:param grayscale: bool
6560
:param lowquality: bool
6661
:param margin_bottom: string eg. 10mm
@@ -77,11 +72,10 @@ def generate_pdf(source, *,
7772
:return: string representing pdf
7873
"""
7974
if source.lstrip().startswith(('http', 'www')):
80-
raise RuntimeError('pdf generation from urls is not supported')
75+
raise ValueError('pdf generation from urls is not supported')
8176

8277
py_args = dict(
8378
cache_dir=cache_dir,
84-
quiet=quiet,
8579
grayscale=grayscale,
8680
lowquality=lowquality,
8781
margin_bottom=margin_bottom,
@@ -106,6 +100,18 @@ def generate_pdf(source, *,
106100
else:
107101
cmd_args.extend([arg_name, str(value)])
108102

103+
# read from stdin and write to stdout
104+
cmd_args += ['-', '-']
105+
106+
p = execute_wk(*cmd_args, input=source.encode())
107+
pdf_bytes = p.stdout
108+
109+
# it seems wkhtmltopdf's error codes can be false, we'll ignore them if we
110+
# seem to have generated a pdf
111+
if p.returncode != 0 and pdf_bytes[:4] != b'%PDF':
112+
raise RuntimeError('error running wkhtmltopdf, command: {!r}\n'
113+
'response: "{}"'.format(cmd_args, p.stderr.strip()))
114+
109115
fields = [
110116
('Title', title),
111117
('Author', author),
@@ -114,29 +120,13 @@ def generate_pdf(source, *,
114120
('Producer', producer),
115121
]
116122
metadata = '\n'.join(f'/{name} ({value})' for name, value in fields if value)
117-
118-
with NamedTemporaryFile(suffix='.html', mode='wb') as html_file:
119-
html_file.write(source.encode())
120-
html_file.flush()
121-
html_file.seek(0)
122-
with NamedTemporaryFile(suffix='.pdf', mode='rb') as pdf_file:
123-
cmd_args += [html_file.name, pdf_file.name]
124-
_, stderr, returncode = execute_wk(*cmd_args)
125-
pdf_file.seek(0)
126-
pdf_bytes = pdf_file.read()
127-
# it seems wkhtmltopdf's error codes can be false, we'll ignore them if we
128-
# seem to have generated a pdf
129-
if returncode != 0 and pdf_bytes[:4] != b'%PDF':
130-
raise RuntimeError('error running wkhtmltopdf, command: {!r}\n'
131-
'response: "{}"'.format(cmd_args, stderr.strip()))
132-
133-
if metadata:
134-
pdf_bytes = re.sub(b'/Title.*\n.*\n/Producer.*', metadata.encode(), pdf_bytes, count=1)
135-
return pdf_bytes
123+
if metadata:
124+
pdf_bytes = re.sub(b'/Title.*\n.*\n/Producer.*', metadata.encode(), pdf_bytes, count=1)
125+
return pdf_bytes
136126

137127

138128
def _string_execute(*args):
139-
return execute_wk(*args)[0].decode().strip(' \n')
129+
return execute_wk(*args).stdout.decode().strip(' \n')
140130

141131

142132
def get_version():

tests/test_main.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from io import BytesIO, StringIO
22

3-
import pytest
43
import pdfminer.layout
4+
import pytest
55
from pdfminer import high_level
66

77
from pydf import generate_pdf, get_extended_help, get_help, get_version
@@ -82,9 +82,16 @@ def test_extra_kwargs():
8282
assert pdf_content[:4] == b'%PDF'
8383

8484

85+
def test_generate_url():
86+
with pytest.raises(ValueError) as exc_info:
87+
generate_pdf('www.google.com')
88+
assert 'pdf generation from urls is not supported' in str(exc_info)
89+
90+
8591
def test_bad_arguments():
86-
with pytest.raises(RuntimeError):
87-
generate_pdf('www.')
92+
with pytest.raises(RuntimeError) as exc_info:
93+
generate_pdf('hellp', foobar='broken')
94+
assert 'error running wkhtmltopdf, command' in str(exc_info)
8895

8996

9097
def test_get_version():

0 commit comments

Comments
 (0)