Skip to content

Commit 4d4629e

Browse files
committed
removing url pdf gen
1 parent 68ffb4c commit 4d4629e

File tree

3 files changed

+21
-17
lines changed

3 files changed

+21
-17
lines changed

pydf/wkhtmltopdf.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ def generate_pdf(source, *,
7676
:param extra_kwargs: any exotic extra options for wkhtmltopdf
7777
:return: string representing pdf
7878
"""
79-
is_url = source.lstrip().startswith(('http', 'www'))
79+
if source.lstrip().startswith(('http', 'www')):
80+
raise RuntimeError('pdf generation from urls is not supported')
8081

8182
py_args = dict(
8283
cache_dir=cache_dir,
@@ -114,9 +115,12 @@ def generate_pdf(source, *,
114115
]
115116
metadata = '\n'.join(f'/{name} ({value})' for name, value in fields if value)
116117

117-
def gen_pdf(src, cmd_args):
118+
with NamedTemporaryFile(suffix='.html', mode='wb') as html_file:
119+
html_file.write(source.encode())
120+
html_file.flush()
121+
html_file.seek(0)
118122
with NamedTemporaryFile(suffix='.pdf', mode='rb') as pdf_file:
119-
cmd_args += [src, pdf_file.name]
123+
cmd_args += [html_file.name, pdf_file.name]
120124
_, stderr, returncode = execute_wk(*cmd_args)
121125
pdf_file.seek(0)
122126
pdf_bytes = pdf_file.read()
@@ -130,15 +134,6 @@ def gen_pdf(src, cmd_args):
130134
pdf_bytes = re.sub(b'/Title.*\n.*\n/Producer.*', metadata.encode(), pdf_bytes, count=1)
131135
return pdf_bytes
132136

133-
if is_url:
134-
return gen_pdf(source, cmd_args)
135-
136-
with NamedTemporaryFile(suffix='.html', mode='wb') as html_file:
137-
html_file.write(source.encode())
138-
html_file.flush()
139-
html_file.seek(0)
140-
return gen_pdf(html_file.name, cmd_args)
141-
142137

143138
def _string_execute(*args):
144139
return execute_wk(*args)[0].decode().strip(' \n')

tests/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
coverage==4.4
22
docutils==0.13.1
33
flake8==3.3.0
4+
pdfminer.six==20170419
45
pycodestyle==2.3.1
56
pyflakes==1.5.0
67
pytest==3.0.7

tests/test_main.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,24 @@
1+
from io import BytesIO, StringIO
2+
13
import pytest
4+
import pdfminer.layout
5+
from pdfminer import high_level
26

37
from pydf import generate_pdf, get_extended_help, get_help, get_version
48

59

10+
def get_pdf_text(pdf_data: bytes) -> str:
11+
laparams = pdfminer.layout.LAParams()
12+
output = StringIO()
13+
high_level.extract_text_to_fp(BytesIO(pdf_data), output, laparams=laparams)
14+
return output.getvalue()
15+
16+
617
def test_generate_pdf_with_html():
718
pdf_content = generate_pdf('<html><body>Is this thing on?</body></html>')
819
assert pdf_content[:4] == b'%PDF'
20+
text = get_pdf_text(pdf_content)
21+
assert 'Is this thing on?\n\n\x0c' == text
922

1023

1124
def test_generate_pdf_with_html_meta_data():
@@ -27,11 +40,6 @@ def test_generate_pdf_with_html_meta_data():
2740
/Creator (this is the creator)""" in beginning
2841

2942

30-
def test_generate_pdf_with_url():
31-
pdf_content = generate_pdf('http://google.com')
32-
assert pdf_content[:4] == b'%PDF'
33-
34-
3543
def test_unicode():
3644
pdf_content = generate_pdf(u'<html><body>Schrödinger</body></html>')
3745
assert pdf_content[:4] == b'%PDF'

0 commit comments

Comments
 (0)