initial version

samuelcolvin · samuelcolvin · commit 4c4c9cc7f19e · 2014-09-14T19:01:01.000+01:00
diff --git a/README.md b/README.md
@@ -1,4 +1,73 @@
-py-wk-pdf
-=========
+pydf
+====
 
-PDF generation in python using wkhtmltopdf suitable for heroku
+PDF generation in python using [wkhtmltopdf](http://wkhtmltopdf.org/) suitable for Heroku.
+
+Based on [pywkher](https://github.com/jwmayfield/pywkher) but significantly extended.
+
+The repo is pretty large (currently 116mb) because it contains two binary version of wkhtmltopdf, one for use on Heroku (Ubuntu 10.04) and one for use on Ubuntu 14.04. See [this](https://github.com/wkhtmltopdf/wkhtmltopdf/issues/1817) discussion for an explanation.
+
+## Basic Usage
+
+    import pydf
+    pdf = pydf.generate_pdf('<h1>this is html</h1>')
+    open('test_doc.pdf', 'w').write(pdf)
+
+    pdf = pydf.generate_pdf('www.google.com')
+    open('google.pdf', 'w').write(pdf)
+
+## API
+
+#### generate_pdf(source, [**kwrags])
+
+Generate a pdf from either a url or a html string.
+
+After the html and url arguments all other arguments are
+passed straight to wkhtmltopdf
+
+For details on extra arguments see the output of get_help()
+and get_extended_help()
+
+All arguments whether specified or caught with extra_kwargs are converted
+to command line args with "'--' + original_name.replace('_', '-')"
+
+Arguments which are True are passed with no value eg. just --quiet, False
+and None arguments are missed, everything else is passed with str(value).
+
+**Arguments:**
+
+* source: html string to generate pdf from or url to get
+* quiet: bool
+* grayscale: bool
+* lowquality: bool
+* margin_bottom: string eg. 10mm
+* margin_left: string eg. 10mm
+* margin_right: string eg. 10mm
+* margin_top: string eg. 10mm
+* orientation: Portrait or Landscape
+* page_height: string eg. 10mm
+* page_width: string eg. 10mm
+* page_size: string: A4, Letter, etc.
+* image_dpi: int default 600
+* image_quality: int default 94
+* extra_kwargs: any exotic extra options for wkhtmltopdf
+
+Returns string representing pdf
+
+#### get_version()
+
+Get version of pydf and wkhtmltopdf binary
+
+#### get_help()
+
+get help string from wkhtmltopdf binary
+uses -h command line option
+
+#### get_extended_help()
+
+get extended help string from wkhtmltopdf binary
+uses -H command line option
+
+### execute_wk(*args)
+
+Low level function to call wkhtmltopdf, arguments are added to wkhtmltopdf binary and passed to subprocess with not processing.
diff --git a/bin/wkhtmltopdf b/bin/wkhtmltopdf
diff --git a/bin/wkhtmltopdf-heroku b/bin/wkhtmltopdf-heroku
diff --git a/pydf/__init__.py b/pydf/__init__.py
@@ -0,0 +1 @@
+from .wkhtmltopdf import *
diff --git a/pydf/wkhtmltopdf.py b/pydf/wkhtmltopdf.py
@@ -0,0 +1,140 @@
+import os
+import subprocess
+from tempfile import NamedTemporaryFile
+
+__version__ = '0.1'
+
+
+def execute_wk(*args):
+    """
+    Generate path for the wkhtmltopdf binary and execute command.
+
+    :param args: args to pass straight to subprocess.Popen
+    :return: stdout, stderr
+    """
+    this_dir = os.path.dirname(__file__)
+    on_heroku = 'DYNO' in os.environ
+    wk_name = 'wkhtmltopdf-heroku' if on_heroku else 'wkhtmltopdf'
+    wkhtmltopdf_default = os.path.join(this_dir, '../bin/', wk_name)
+    # Reference command
+    wkhtmltopdf_cmd = os.environ.get('WKHTMLTOPDF_CMD', wkhtmltopdf_default)
+    if not os.path.isfile(wkhtmltopdf_cmd):
+        raise IOError('wkhtmltopdf binary not found at %s' % wkhtmltopdf_cmd)
+    wk_args = (wkhtmltopdf_cmd,) + args
+    p = subprocess.Popen(wk_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = p.communicate()
+    if p.returncode != 0:
+        raise IOError('error running wkhtmltopdf: "%s"' % stderr)
+    return stdout, stderr
+
+
+def generate_pdf(source,
+                 quiet=True,
+                 grayscale=False,
+                 lowquality=False,
+                 margin_bottom=None,
+                 margin_left=None,
+                 margin_right=None,
+                 margin_top=None,
+                 orientation=None,
+                 page_height=None,
+                 page_width=None,
+                 page_size=None,
+                 image_dpi=None,
+                 image_quality=None,
+                 **extra_kwargs):
+    """
+    Generate a pdf from either a url or a html string.
+
+    After the html and url arguments all other arguments are
+    passed straight to wkhtmltopdf
+
+    For details on extra arguments see the output of get_help()
+    and get_extended_help()
+
+    All arguments whether specified or caught with extra_kwargs are converted
+    to command line args with "'--' + original_name.replace('_', '-')"
+
+    Arguments which are True are passed with no value eg. just --quiet, False
+    and None arguments are missed, everything else is passed with str(value).
+
+    :param source: html string to generate pdf from or url to get
+    :param quiet: bool
+    :param grayscale: bool
+    :param lowquality: bool
+    :param margin_bottom: string eg. 10mm
+    :param margin_left: string eg. 10mm
+    :param margin_right: string eg. 10mm
+    :param margin_top: string eg. 10mm
+    :param orientation: Portrait or Landscape
+    :param page_height: string eg. 10mm
+    :param page_width: string eg. 10mm
+    :param page_size: string: A4, Letter, etc.
+    :param image_dpi: int default 600
+    :param image_quality: int default 94
+    :param extra_kwargs: any exotic extra options for wkhtmltopdf
+    :return: string representing pdf
+    """
+    is_url = any(source.strip().startswith(s) for s in ('http', 'www'))
+
+    loc = locals()
+    py_args = {n: loc[n] for n in
+               ['quiet', 'grayscale', 'lowquality', 'margin_bottom', 'margin_left', 'margin_right', 'margin_top',
+               'orientation', 'page_height', 'page_width', 'page_size', 'image_dpi', 'image_quality']}
+    py_args.update(extra_kwargs)
+    cmd_args = []
+    for name, value in py_args.items():
+        if value in [None, False]:
+            continue
+        arg_name = '--' + name.replace('_', '-')
+        if value is True:
+            cmd_args.append(arg_name)
+        else:
+            cmd_args.extend([arg_name, str(value)])
+
+    def gen(src):
+        with NamedTemporaryFile(suffix='.pdf', mode='rwb+') as pdf_file:
+            cmd_args.extend([src, pdf_file.name])
+            execute_wk(*cmd_args)
+            pdf_file.seek(0)
+            return pdf_file.read()
+
+    if is_url:
+        return gen(source)
+
+    with NamedTemporaryFile(suffix='.html', mode='w') as html_file:
+        html_file.write(source.encode('utf-8'))
+        html_file.flush()
+        html_file.seek(0)
+        return gen(html_file.name)
+
+
+def get_version():
+    """
+    Get version of pydf and wkhtmltopdf binary
+
+    :return: version string
+    """
+    v = 'pydf version: %s\n' % __version__
+    v += 'wkhtmltopdf version: %s' % execute_wk('-V')[0]
+    return v
+
+
+def get_help():
+    """
+    get help string from wkhtmltopdf binary
+    uses -h command line option
+
+    :return: help string
+    """
+    return execute_wk('-h')[0]
+
+
+def get_extended_help():
+    """
+    get extended help string from wkhtmltopdf binary
+    uses -H command line option
+
+    :return: extended help string
+    """
+    return execute_wk('-H')[0]
diff --git a/setup.py b/setup.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python
+
+import os, re
+
+from setuptools import setup
+
+description = 'PDF generation in python using wkhtmltopdf suitable for heroku'
+
+setup(
+    name='pydf',
+    version = '0.1',
+    description = description,
+    author = 'Samuel Colvin',
+    license = 'MIT',
+    author_email = 'S@muelColvin.com',
+    url = 'https://github.com/samuelcolvin/pydf',
+    packages = ['pydf'],
+    platforms = 'any',
+    scripts=['bin/wkhtmltopdf', 'bin/wkhtmltopdf-heroku'],
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Environment :: Web Environment',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: BSD License',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2.6',
+        'Programming Language :: Python :: 2.7',
+        'Topic :: Internet :: WWW/HTTP',
+        'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+        ],
+    test_suite='tests',
+    zip_safe=False
+)
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1 @@
+from .runtests import *
diff --git a/tests/runtests.py b/tests/runtests.py
@@ -0,0 +1,79 @@
+# coding: utf8
+import os
+from unittest import TestCase
+from pydf import generate_pdf, get_version, get_help, get_extended_help
+
+
+class PywkherTestCase(TestCase):
+    def test_generate_pdf_with_html(self):
+        pdf_content = generate_pdf('<html><body>Is this thing on?</body></html>')
+        assert pdf_content[:4] == '%PDF'
+
+    def test_generate_pdf_with_url(self):
+        pdf_content = generate_pdf('http://google.com')
+        assert pdf_content[:4] == '%PDF'
+
+    def test_unicode(self):
+        pdf_content = generate_pdf(u'<html><body>Schrödinger</body></html>')
+        assert pdf_content[:4] == '%PDF'
+
+    def test_extra_arguments(self):
+        pdf_content = generate_pdf(
+            '<html><body>testing</body></html>',
+            quiet=False,
+            grayscale=True,
+            lowquality=True,
+            margin_bottom='20mm',
+            margin_left='20mm',
+            margin_right='20mm',
+            margin_top='20mm',
+            orientation='Landscape',
+            page_height=None,
+            page_width=None,
+            page_size='Letter',
+            image_dpi='300',
+            image_quality='70',
+        )
+        assert pdf_content[:4] == '%PDF'
+
+    def test_custom_size(self):
+        pdf_content = generate_pdf(
+            '<html><body>testing</body></html>',
+            page_height='50mm',
+            page_width='50mm',
+        )
+        assert pdf_content[:4] == '%PDF'
+
+    def test_extra_kwargs(self):
+        pdf_content = generate_pdf(
+            '<html><body>testing</body></html>',
+            header_right='Page [page] of [toPage]'
+        )
+        assert pdf_content[:4] == '%PDF'
+
+    def test_wrong_path(self):
+        os.environ['WKHTMLTOPDF_CMD'] = 'foo bar'
+        try:
+            get_help()
+        except IOError:
+            pass
+        else:
+            raise AssertionError('should have raised IOError with wrong WKHTMLTOPDF_CMD')
+        del os.environ['WKHTMLTOPDF_CMD']
+
+    def test_no_arguments(self):
+        try:
+            generate_pdf()
+        except TypeError:
+            pass
+        else:
+            raise AssertionError('Should have raised a TypeError')
+
+    def test_get_version(self):
+        print get_version()
+
+    def test_get_help(self):
+        get_help()
+
+    def test_get_extended_help(self):
+        get_extended_help()