Skip to content

Commit 86050e8

Browse files
authored
Merge pull request #1067 from juhasch/feature/pre_embedimages
Preprocessor to embed markdown images
2 parents 70e0be1 + ed3e91e commit 86050e8

File tree

5 files changed

+237
-0
lines changed

5 files changed

+237
-0
lines changed

docs/source/exporting.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ Converting linked SVG to PDF
5151
.. autoclass:: SVG2PDFPreprocessor
5252

5353

54+
Embedding images in notebooks
55+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
56+
57+
.. autoclass:: EmbedImagesPreprocessor
58+
5459

5560
Postprocessors
5661
--------------

src/jupyter_contrib_nbextensions/nbconvert_support/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from .pp_highlighter import HighlighterPostProcessor, HighlighterPreprocessor
1111
from .pre_codefolding import CodeFoldingPreprocessor
1212
from .pre_pymarkdown import PyMarkdownPreprocessor
13+
from .pre_embedimages import EmbedImagesPreprocessor
1314
from .pre_svg2pdf import SVG2PDFPreprocessor
1415
from .toc2 import TocExporter
1516

@@ -22,6 +23,7 @@
2223
'ExporterInliner',
2324
'HighlighterPostProcessor',
2425
'HighlighterPreprocessor',
26+
'EmbedImagesPreprocessor',
2527
'NotebookLangExporter',
2628
'PyMarkdownPreprocessor',
2729
'SVG2PDFPreprocessor',
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
"""Nbconvert preprocessor for the python-markdown nbextension."""
2+
3+
import base64
4+
import os
5+
import re
6+
7+
from ipython_genutils.ipstruct import Struct
8+
from nbconvert.preprocessors import Preprocessor
9+
from traitlets import Bool, Unicode
10+
11+
try:
12+
from urllib.request import urlopen # py3
13+
except ImportError:
14+
from urllib2 import urlopen
15+
16+
17+
class EmbedImagesPreprocessor(Preprocessor):
18+
"""
19+
:mod:`nbconvert` Preprocessor to embed images in a markdown cell as
20+
attachment inside the notebook itself.
21+
22+
This :class:`~nbconvert.preprocessors.Preprocessor` replaces kernel code in
23+
markdown cells with the results stored in the cell metadata.
24+
25+
The preprocessor is installed by default. To enable embedding images with
26+
NbConvert, you need to set the configuration parameter
27+
`EmbedImagesPreprocessor.embed_images=True`.
28+
This can be done either in the `jupyter_nbconvert_config.py` file::
29+
30+
c.EmbedImagesPreprocessor.embed_images=True
31+
32+
or using a command line parameter when calling NbConvert::
33+
34+
$ jupyter nbconvert --to html --EmbedImagesPreprocessor.embed_images=True mynotebook.ipynb
35+
36+
Further options are::
37+
38+
EmbedImagesPreprocessor.embed_remote_images=True
39+
40+
to additionally embeds all images referenced by an url
41+
(e.g. http://jupyter.org/assets/nav_logo.svg) instead of a local file name.
42+
43+
Another configuration option is::
44+
45+
EmbedImagesPreprocessor.resize=small
46+
47+
Let's you scale-down the size of an image. This is useful if you want to
48+
save space by not embedding large images and instead use a smaller (scaled)
49+
version. Works only for raster images (i.e. png, jpg).
50+
Valid resize settings are: small = 500px, mid = 1000px, large = 2000px
51+
for maximum size in length or width. No upscaling of small images will
52+
be performed. The Python package `PIL` needs to be installed for this
53+
option to work.
54+
55+
Example::
56+
57+
$ jupyter nbconvert --to html --EmbedImagesPreprocessor.embed_images=True
58+
--EmbedImagesPreprocessor.resize=large mynotebook.ipynb
59+
60+
*Note:* To embed images after conversion to HTML you can also use the
61+
`html_embed` exporter
62+
"""
63+
64+
embed_images = Bool(False, help="Embed images as attachment").tag(config=True)
65+
embed_remote_images = Bool(False, help="Embed images referenced by an url as attachment").tag(config=True)
66+
resize = Unicode('', help="Resize images to save space (reduce size)").tag(config=True)
67+
imgsizes = {'small': 500, 'mid': 1000, 'large': 2000}
68+
69+
def preprocess(self, nb, resources):
70+
"""Skip preprocessor if not enabled"""
71+
if self.embed_images:
72+
nb, resources = super(EmbedImagesPreprocessor, self).preprocess(nb, resources)
73+
return nb, resources
74+
75+
def resize_image(self, imgname, imgformat, imgdata):
76+
"""Resize images if desired and PIL is installed
77+
78+
Parameters
79+
----------
80+
imgname: str
81+
Name of image
82+
imgformat: str
83+
Format of image (JPG or PNG)
84+
imgdata:
85+
Binary image data
86+
87+
"""
88+
if imgformat in ['png', 'jpg']:
89+
from io import BytesIO
90+
try:
91+
from PIL import Image
92+
except ImportError:
93+
self.log.info("Pillow library not available to resize images")
94+
return imgdata
95+
# Only make images smaller when rescaling
96+
im = Image.open(BytesIO(imgdata))
97+
factor = self.imgsizes[self.resize] / max(im.size)
98+
if factor < 1.0:
99+
newsize = (int(im.size[0] * factor), int(im.size[1] * factor))
100+
newim = im.resize(newsize)
101+
fp = BytesIO()
102+
# PIL requires JPEG instead of JPG
103+
newim.save(fp, format=imgformat.replace('jpg', 'jpeg'))
104+
imgdata = fp.getvalue()
105+
fp.close()
106+
self.log.debug("Resized %d x %d image %s to size %d x %d pixels" %
107+
(im.size[0], im.size[1], imgname, newsize[0], newsize[1]))
108+
return imgdata
109+
110+
def replfunc_md(self, match):
111+
"""Read image and store as base64 encoded attachment"""
112+
url = match.group(2)
113+
imgformat = url.split('.')[-1].lower()
114+
if url.startswith('http'):
115+
if self.embed_remote_images:
116+
data = urlopen(url).read()
117+
else:
118+
return match.group(0)
119+
elif url.startswith('attachment'):
120+
return match.group(0)
121+
else:
122+
filename = os.path.join(self.path, url)
123+
with open(filename, 'rb') as f:
124+
data = f.read()
125+
126+
if self.resize in self.imgsizes.keys():
127+
data = self.resize_image(url, imgformat, data)
128+
129+
self.log.debug("Embedding url: %s, format: %s" % (url, imgformat))
130+
b64_data = base64.b64encode(data).decode("utf-8")
131+
self.attachments[url] = {'image/' + imgformat: b64_data}
132+
133+
newimg = '![' + match.group(1) + '](attachment:' + match.group(2) + ')'
134+
return newimg
135+
136+
def preprocess_cell(self, cell, resources, index):
137+
"""
138+
Preprocess cell
139+
140+
Parameters
141+
----------
142+
cell : NotebookNode cell
143+
Notebook cell being processed
144+
resources : dictionary
145+
Additional resources used in the conversion process. Allows
146+
preprocessors to pass variables into the Jinja engine.
147+
index : int
148+
Index of the cell being processed (see base.py)
149+
"""
150+
self.path = resources['metadata']['path']
151+
self.attachments = getattr(cell, 'attachments', Struct())
152+
153+
if cell.cell_type == "markdown":
154+
regex = re.compile('!\[([^"]*)\]\(([^"]+)\)')
155+
cell.source = regex.sub(self.replfunc_md, cell.source)
156+
cell.attachments = self.attachments
157+
return cell, resources

tests/data/large_image.png

88.1 KB
Loading

tests/test_preprocessors.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,78 @@ def test_preprocessor_svg2pdf():
113113
'exported pdf should be referenced in exported notebook')
114114

115115

116+
def test_preprocessor_embedimages():
117+
"""Test python embedimages preprocessor."""
118+
# check import shortcut
119+
from jupyter_contrib_nbextensions.nbconvert_support import EmbedImagesPreprocessor # noqa E501
120+
notebook_node = nbf.new_notebook(cells=[
121+
nbf.new_code_cell(source="a = 'world'"),
122+
nbf.new_markdown_cell(
123+
source="![testimage]({})".format(path_in_data('icon.png'))
124+
),
125+
])
126+
customconfig = Config(EmbedImagesPreprocessor={'embed_images': True})
127+
body, resources = export_through_preprocessor(
128+
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
129+
customconfig)
130+
131+
expected = 'image/png'
132+
assert_in(expected, body, 'Attachment {} is missing'.format(expected))
133+
134+
135+
def test_preprocessor_embedimages_resize():
136+
"""Test python embedimages preprocessor."""
137+
# check import shortcut
138+
from jupyter_contrib_nbextensions.nbconvert_support import EmbedImagesPreprocessor # noqa E501
139+
140+
try:
141+
from PIL import Image # noqa F401
142+
except ImportError:
143+
raise SkipTest('PIL not found')
144+
145+
notebook_node = nbf.new_notebook(cells=[
146+
nbf.new_code_cell(source="a = 'world'"),
147+
nbf.new_markdown_cell(
148+
source="![testimage]({})".format(path_in_data('large_image.png'))
149+
),
150+
])
151+
body, resources = export_through_preprocessor(
152+
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb')
153+
len_noembed = len(body)
154+
155+
customconfig = Config(EmbedImagesPreprocessor={'embed_images': True,
156+
'resize': 'small'})
157+
body, resources = export_through_preprocessor(
158+
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
159+
customconfig)
160+
len_small = len(body)
161+
162+
customconfig = Config(EmbedImagesPreprocessor={'embed_images': True,
163+
'resize': 'mid'})
164+
body, resources = export_through_preprocessor(
165+
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
166+
customconfig)
167+
len_mid = len(body)
168+
169+
customconfig = Config(EmbedImagesPreprocessor={'embed_images': True,
170+
'resize': 'large'})
171+
body, resources = export_through_preprocessor(
172+
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
173+
customconfig)
174+
len_large = len(body)
175+
176+
customconfig = Config(EmbedImagesPreprocessor={'embed_images': True})
177+
body, resources = export_through_preprocessor(
178+
notebook_node, EmbedImagesPreprocessor, NotebookExporter, 'ipynb',
179+
customconfig)
180+
len_noresize = len(body)
181+
182+
assert(len_noembed < len_small)
183+
assert(len_small < len_mid)
184+
assert(len_mid < len_large)
185+
assert(len_large < len_noresize)
186+
187+
116188
def _normalize_iso8601_timezone(timestamp_str):
117189
# Zulu -> +00:00 offset
118190
timestamp_str = re.sub(r'Z$', r'+00:00', timestamp_str)
@@ -146,3 +218,4 @@ def test_preprocessor_execute_time():
146218
_normalize_iso8601_timezone(etmd['end_time']),
147219
_normalize_iso8601_timezone(etmd['start_time']),
148220
'end_time should not be before start_time')
221+

0 commit comments

Comments
 (0)