Skip to content

Commit 4c4c9cc

Browse files
committed
initial version
1 parent 674ca8a commit 4c4c9cc

File tree

8 files changed

+328
-3
lines changed

8 files changed

+328
-3
lines changed

README.md

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,73 @@
1-
py-wk-pdf
2-
=========
1+
pydf
2+
====
33

4-
PDF generation in python using wkhtmltopdf suitable for heroku
4+
PDF generation in python using [wkhtmltopdf](http://wkhtmltopdf.org/) suitable for Heroku.
5+
6+
Based on [pywkher](https://github.com/jwmayfield/pywkher) but significantly extended.
7+
8+
The repo is pretty large (currently 116mb) because it contains two binary version of wkhtmltopdf, one for use on Heroku (Ubuntu 10.04) and one for use on Ubuntu 14.04. See [this](https://github.com/wkhtmltopdf/wkhtmltopdf/issues/1817) discussion for an explanation.
9+
10+
## Basic Usage
11+
12+
import pydf
13+
pdf = pydf.generate_pdf('<h1>this is html</h1>')
14+
open('test_doc.pdf', 'w').write(pdf)
15+
16+
pdf = pydf.generate_pdf('www.google.com')
17+
open('google.pdf', 'w').write(pdf)
18+
19+
## API
20+
21+
#### generate_pdf(source, [**kwrags])
22+
23+
Generate a pdf from either a url or a html string.
24+
25+
After the html and url arguments all other arguments are
26+
passed straight to wkhtmltopdf
27+
28+
For details on extra arguments see the output of get_help()
29+
and get_extended_help()
30+
31+
All arguments whether specified or caught with extra_kwargs are converted
32+
to command line args with "'--' + original_name.replace('_', '-')"
33+
34+
Arguments which are True are passed with no value eg. just --quiet, False
35+
and None arguments are missed, everything else is passed with str(value).
36+
37+
**Arguments:**
38+
39+
* source: html string to generate pdf from or url to get
40+
* quiet: bool
41+
* grayscale: bool
42+
* lowquality: bool
43+
* margin_bottom: string eg. 10mm
44+
* margin_left: string eg. 10mm
45+
* margin_right: string eg. 10mm
46+
* margin_top: string eg. 10mm
47+
* orientation: Portrait or Landscape
48+
* page_height: string eg. 10mm
49+
* page_width: string eg. 10mm
50+
* page_size: string: A4, Letter, etc.
51+
* image_dpi: int default 600
52+
* image_quality: int default 94
53+
* extra_kwargs: any exotic extra options for wkhtmltopdf
54+
55+
Returns string representing pdf
56+
57+
#### get_version()
58+
59+
Get version of pydf and wkhtmltopdf binary
60+
61+
#### get_help()
62+
63+
get help string from wkhtmltopdf binary
64+
uses -h command line option
65+
66+
#### get_extended_help()
67+
68+
get extended help string from wkhtmltopdf binary
69+
uses -H command line option
70+
71+
### execute_wk(*args)
72+
73+
Low level function to call wkhtmltopdf, arguments are added to wkhtmltopdf binary and passed to subprocess with not processing.

bin/wkhtmltopdf

36.6 MB
Binary file not shown.

bin/wkhtmltopdf-heroku

44.5 MB
Binary file not shown.

pydf/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .wkhtmltopdf import *

pydf/wkhtmltopdf.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import os
2+
import subprocess
3+
from tempfile import NamedTemporaryFile
4+
5+
__version__ = '0.1'
6+
7+
8+
def execute_wk(*args):
9+
"""
10+
Generate path for the wkhtmltopdf binary and execute command.
11+
12+
:param args: args to pass straight to subprocess.Popen
13+
:return: stdout, stderr
14+
"""
15+
this_dir = os.path.dirname(__file__)
16+
on_heroku = 'DYNO' in os.environ
17+
wk_name = 'wkhtmltopdf-heroku' if on_heroku else 'wkhtmltopdf'
18+
wkhtmltopdf_default = os.path.join(this_dir, '../bin/', wk_name)
19+
# Reference command
20+
wkhtmltopdf_cmd = os.environ.get('WKHTMLTOPDF_CMD', wkhtmltopdf_default)
21+
if not os.path.isfile(wkhtmltopdf_cmd):
22+
raise IOError('wkhtmltopdf binary not found at %s' % wkhtmltopdf_cmd)
23+
wk_args = (wkhtmltopdf_cmd,) + args
24+
p = subprocess.Popen(wk_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
25+
stdout, stderr = p.communicate()
26+
if p.returncode != 0:
27+
raise IOError('error running wkhtmltopdf: "%s"' % stderr)
28+
return stdout, stderr
29+
30+
31+
def generate_pdf(source,
32+
quiet=True,
33+
grayscale=False,
34+
lowquality=False,
35+
margin_bottom=None,
36+
margin_left=None,
37+
margin_right=None,
38+
margin_top=None,
39+
orientation=None,
40+
page_height=None,
41+
page_width=None,
42+
page_size=None,
43+
image_dpi=None,
44+
image_quality=None,
45+
**extra_kwargs):
46+
"""
47+
Generate a pdf from either a url or a html string.
48+
49+
After the html and url arguments all other arguments are
50+
passed straight to wkhtmltopdf
51+
52+
For details on extra arguments see the output of get_help()
53+
and get_extended_help()
54+
55+
All arguments whether specified or caught with extra_kwargs are converted
56+
to command line args with "'--' + original_name.replace('_', '-')"
57+
58+
Arguments which are True are passed with no value eg. just --quiet, False
59+
and None arguments are missed, everything else is passed with str(value).
60+
61+
:param source: html string to generate pdf from or url to get
62+
:param quiet: bool
63+
:param grayscale: bool
64+
:param lowquality: bool
65+
:param margin_bottom: string eg. 10mm
66+
:param margin_left: string eg. 10mm
67+
:param margin_right: string eg. 10mm
68+
:param margin_top: string eg. 10mm
69+
:param orientation: Portrait or Landscape
70+
:param page_height: string eg. 10mm
71+
:param page_width: string eg. 10mm
72+
:param page_size: string: A4, Letter, etc.
73+
:param image_dpi: int default 600
74+
:param image_quality: int default 94
75+
:param extra_kwargs: any exotic extra options for wkhtmltopdf
76+
:return: string representing pdf
77+
"""
78+
is_url = any(source.strip().startswith(s) for s in ('http', 'www'))
79+
80+
loc = locals()
81+
py_args = {n: loc[n] for n in
82+
['quiet', 'grayscale', 'lowquality', 'margin_bottom', 'margin_left', 'margin_right', 'margin_top',
83+
'orientation', 'page_height', 'page_width', 'page_size', 'image_dpi', 'image_quality']}
84+
py_args.update(extra_kwargs)
85+
cmd_args = []
86+
for name, value in py_args.items():
87+
if value in [None, False]:
88+
continue
89+
arg_name = '--' + name.replace('_', '-')
90+
if value is True:
91+
cmd_args.append(arg_name)
92+
else:
93+
cmd_args.extend([arg_name, str(value)])
94+
95+
def gen(src):
96+
with NamedTemporaryFile(suffix='.pdf', mode='rwb+') as pdf_file:
97+
cmd_args.extend([src, pdf_file.name])
98+
execute_wk(*cmd_args)
99+
pdf_file.seek(0)
100+
return pdf_file.read()
101+
102+
if is_url:
103+
return gen(source)
104+
105+
with NamedTemporaryFile(suffix='.html', mode='w') as html_file:
106+
html_file.write(source.encode('utf-8'))
107+
html_file.flush()
108+
html_file.seek(0)
109+
return gen(html_file.name)
110+
111+
112+
def get_version():
113+
"""
114+
Get version of pydf and wkhtmltopdf binary
115+
116+
:return: version string
117+
"""
118+
v = 'pydf version: %s\n' % __version__
119+
v += 'wkhtmltopdf version: %s' % execute_wk('-V')[0]
120+
return v
121+
122+
123+
def get_help():
124+
"""
125+
get help string from wkhtmltopdf binary
126+
uses -h command line option
127+
128+
:return: help string
129+
"""
130+
return execute_wk('-h')[0]
131+
132+
133+
def get_extended_help():
134+
"""
135+
get extended help string from wkhtmltopdf binary
136+
uses -H command line option
137+
138+
:return: extended help string
139+
"""
140+
return execute_wk('-H')[0]

setup.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/python
2+
3+
import os, re
4+
5+
from setuptools import setup
6+
7+
description = 'PDF generation in python using wkhtmltopdf suitable for heroku'
8+
9+
setup(
10+
name='pydf',
11+
version = '0.1',
12+
description = description,
13+
author = 'Samuel Colvin',
14+
license = 'MIT',
15+
author_email = '[email protected]',
16+
url = 'https://github.com/samuelcolvin/pydf',
17+
packages = ['pydf'],
18+
platforms = 'any',
19+
scripts=['bin/wkhtmltopdf', 'bin/wkhtmltopdf-heroku'],
20+
classifiers=[
21+
'Development Status :: 5 - Production/Stable',
22+
'Environment :: Web Environment',
23+
'Intended Audience :: Developers',
24+
'License :: OSI Approved :: BSD License',
25+
'Operating System :: OS Independent',
26+
'Programming Language :: Python',
27+
'Programming Language :: Python :: 2.6',
28+
'Programming Language :: Python :: 2.7',
29+
'Topic :: Internet :: WWW/HTTP',
30+
'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
31+
'Topic :: Software Development :: Libraries :: Python Modules',
32+
],
33+
test_suite='tests',
34+
zip_safe=False
35+
)

tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .runtests import *

tests/runtests.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# coding: utf8
2+
import os
3+
from unittest import TestCase
4+
from pydf import generate_pdf, get_version, get_help, get_extended_help
5+
6+
7+
class PywkherTestCase(TestCase):
8+
def test_generate_pdf_with_html(self):
9+
pdf_content = generate_pdf('<html><body>Is this thing on?</body></html>')
10+
assert pdf_content[:4] == '%PDF'
11+
12+
def test_generate_pdf_with_url(self):
13+
pdf_content = generate_pdf('http://google.com')
14+
assert pdf_content[:4] == '%PDF'
15+
16+
def test_unicode(self):
17+
pdf_content = generate_pdf(u'<html><body>Schrödinger</body></html>')
18+
assert pdf_content[:4] == '%PDF'
19+
20+
def test_extra_arguments(self):
21+
pdf_content = generate_pdf(
22+
'<html><body>testing</body></html>',
23+
quiet=False,
24+
grayscale=True,
25+
lowquality=True,
26+
margin_bottom='20mm',
27+
margin_left='20mm',
28+
margin_right='20mm',
29+
margin_top='20mm',
30+
orientation='Landscape',
31+
page_height=None,
32+
page_width=None,
33+
page_size='Letter',
34+
image_dpi='300',
35+
image_quality='70',
36+
)
37+
assert pdf_content[:4] == '%PDF'
38+
39+
def test_custom_size(self):
40+
pdf_content = generate_pdf(
41+
'<html><body>testing</body></html>',
42+
page_height='50mm',
43+
page_width='50mm',
44+
)
45+
assert pdf_content[:4] == '%PDF'
46+
47+
def test_extra_kwargs(self):
48+
pdf_content = generate_pdf(
49+
'<html><body>testing</body></html>',
50+
header_right='Page [page] of [toPage]'
51+
)
52+
assert pdf_content[:4] == '%PDF'
53+
54+
def test_wrong_path(self):
55+
os.environ['WKHTMLTOPDF_CMD'] = 'foo bar'
56+
try:
57+
get_help()
58+
except IOError:
59+
pass
60+
else:
61+
raise AssertionError('should have raised IOError with wrong WKHTMLTOPDF_CMD')
62+
del os.environ['WKHTMLTOPDF_CMD']
63+
64+
def test_no_arguments(self):
65+
try:
66+
generate_pdf()
67+
except TypeError:
68+
pass
69+
else:
70+
raise AssertionError('Should have raised a TypeError')
71+
72+
def test_get_version(self):
73+
print get_version()
74+
75+
def test_get_help(self):
76+
get_help()
77+
78+
def test_get_extended_help(self):
79+
get_extended_help()

0 commit comments

Comments
 (0)