use stdin and stdout not named temporary files

samuelcolvin · samuelcolvin · commit a26b6871aed8 · 2017-05-23T21:34:53.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@ env
 .cache/
 benchmark/pdf_cache/
 benchmark/output/
+htmlcov/
diff --git a/pydf/wkhtmltopdf.py b/pydf/wkhtmltopdf.py
@@ -1,25 +1,22 @@
 import re
 import subprocess
 from pathlib import Path
-from tempfile import NamedTemporaryFile
 
 from .version import VERSION
 
 THIS_DIR = Path(__file__).parent.resolve()
 WK_PATH = THIS_DIR / 'bin' / 'wkhtmltopdf'
 
 
-def execute_wk(*args):
+def execute_wk(*args, input=None):
     """
     Generate path for the wkhtmltopdf binary and execute command.
 
     :param args: args to pass straight to subprocess.Popen
     :return: stdout, stderr
     """
     wk_args = (str(WK_PATH),) + args
-    p = subprocess.Popen(wk_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    stdout, stderr = p.communicate()
-    return stdout, stderr, p.returncode
+    return subprocess.run(wk_args, input=input, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
 
 def generate_pdf(source, *,
@@ -30,7 +27,6 @@ def generate_pdf(source, *,
                  producer=None,
                  # from here on arguments are passed via the commandline to wkhtmltopdf
                  cache_dir=None,
-                 quiet=True,
                  grayscale=False,
                  lowquality=False,
                  margin_bottom=None,
@@ -60,7 +56,6 @@ def generate_pdf(source, *,
     and None arguments are missed, everything else is passed with str(value).
 
     :param source: html string to generate pdf from or url to get
-    :param quiet: bool
     :param grayscale: bool
     :param lowquality: bool
     :param margin_bottom: string eg. 10mm
@@ -77,11 +72,10 @@ def generate_pdf(source, *,
     :return: string representing pdf
     """
     if source.lstrip().startswith(('http', 'www')):
-        raise RuntimeError('pdf generation from urls is not supported')
+        raise ValueError('pdf generation from urls is not supported')
 
     py_args = dict(
         cache_dir=cache_dir,
-        quiet=quiet,
         grayscale=grayscale,
         lowquality=lowquality,
         margin_bottom=margin_bottom,
@@ -106,6 +100,18 @@ def generate_pdf(source, *,
         else:
             cmd_args.extend([arg_name, str(value)])
 
+    # read from stdin and write to stdout
+    cmd_args += ['-', '-']
+
+    p = execute_wk(*cmd_args, input=source.encode())
+    pdf_bytes = p.stdout
+
+    # it seems wkhtmltopdf's error codes can be false, we'll ignore them if we
+    # seem to have generated a pdf
+    if p.returncode != 0 and pdf_bytes[:4] != b'%PDF':
+        raise RuntimeError('error running wkhtmltopdf, command: {!r}\n'
+                           'response: "{}"'.format(cmd_args, p.stderr.strip()))
+
     fields = [
         ('Title', title),
         ('Author', author),
@@ -114,29 +120,13 @@ def generate_pdf(source, *,
         ('Producer', producer),
     ]
     metadata = '\n'.join(f'/{name} ({value})' for name, value in fields if value)
-
-    with NamedTemporaryFile(suffix='.html', mode='wb') as html_file:
-        html_file.write(source.encode())
-        html_file.flush()
-        html_file.seek(0)
-        with NamedTemporaryFile(suffix='.pdf', mode='rb') as pdf_file:
-            cmd_args += [html_file.name, pdf_file.name]
-            _, stderr, returncode = execute_wk(*cmd_args)
-            pdf_file.seek(0)
-            pdf_bytes = pdf_file.read()
-            # it seems wkhtmltopdf's error codes can be false, we'll ignore them if we
-            # seem to have generated a pdf
-            if returncode != 0 and pdf_bytes[:4] != b'%PDF':
-                raise RuntimeError('error running wkhtmltopdf, command: {!r}\n'
-                                   'response: "{}"'.format(cmd_args, stderr.strip()))
-
-            if metadata:
-                pdf_bytes = re.sub(b'/Title.*\n.*\n/Producer.*', metadata.encode(), pdf_bytes, count=1)
-            return pdf_bytes
+    if metadata:
+        pdf_bytes = re.sub(b'/Title.*\n.*\n/Producer.*', metadata.encode(), pdf_bytes, count=1)
+    return pdf_bytes
 
 
 def _string_execute(*args):
-    return execute_wk(*args)[0].decode().strip(' \n')
+    return execute_wk(*args).stdout.decode().strip(' \n')
 
 
 def get_version():
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -1,7 +1,7 @@
 from io import BytesIO, StringIO
 
-import pytest
 import pdfminer.layout
+import pytest
 from pdfminer import high_level
 
 from pydf import generate_pdf, get_extended_help, get_help, get_version
@@ -82,9 +82,16 @@ def test_extra_kwargs():
     assert pdf_content[:4] == b'%PDF'
 
 
+def test_generate_url():
+    with pytest.raises(ValueError) as exc_info:
+        generate_pdf('www.google.com')
+    assert 'pdf generation from urls is not supported' in str(exc_info)
+
+
 def test_bad_arguments():
-    with pytest.raises(RuntimeError):
-        generate_pdf('www.')
+    with pytest.raises(RuntimeError) as exc_info:
+        generate_pdf('hellp', foobar='broken')
+    assert 'error running wkhtmltopdf, command' in str(exc_info)
 
 
 def test_get_version():