Skip to content

Commit 7e7c4c0

Browse files
author
Jürgen Hasch
committed
Preprocessor to embed markdown images
1 parent 9c31bf3 commit 7e7c4c0

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""Nbconvert preprocessor for the python-markdown nbextension."""
2+
3+
from nbconvert.preprocessors import Preprocessor
4+
from traitlets import Bool, Float
5+
import re
6+
import os
7+
import base64
8+
from urllib.request import urlopen
9+
from io import StringIO
10+
from ipython_genutils.ipstruct import Struct
11+
12+
13+
class EmbedImagesPreprocessor(Preprocessor):
14+
"""
15+
:mod:`nbconvert` Preprocessor to embed images in a markdown cell as attachment inside the notebook itself.
16+
17+
This :class:`~nbconvert.preprocessors.Preprocessor` replaces kernel code in
18+
markdown cells with the results stored in the cell metadata.
19+
20+
The preprocessor is installed by default. To enable embedding images with
21+
NbConvert, you need to set the configuration parameter
22+
`EmbedImagesPreprocessor.embed_images=True`.
23+
This can be done either in the `jupyter_nbconvert_config.py` file::
24+
25+
c.EmbedImagesPreprocessor.embed_images=True
26+
27+
or using a command line parameter when calling NbConvert::
28+
29+
$ jupyter nbconvert --to html --EmbedImagesPreprocessor.embed_images=True mynotebook.ipynb
30+
31+
Further options are
32+
33+
EmbedImagesPreprocessor.embed_remote_images=True
34+
35+
to additionally embeds all images referenced by an url (e.g. http://jupyter.org/assets/nav_logo.svg) instead
36+
of a local file name. Also
37+
38+
EmbedImagesPreprocessor.dpi_scaling
39+
40+
Let's you scale the size of an image. This is interesting if you want to save space by not embedding large
41+
images and instead use a smaller (scaled) version. Works only for raster images (i.e. png, jpg).
42+
43+
*Note:* To embed images after conversion to HTML you can also use the `html_embed` exporter
44+
"""
45+
46+
embed_images = Bool(False, help="Embed images as attachment").tag(config=True)
47+
embed_remote_images = Bool(False, help="Embed images referenced by an url as attachment").tag(config=True)
48+
dpi_scaling = Float(0, help="Resize images to a certain DPI number (reduce size)").tag(config=True)
49+
50+
def preprocess(self, nb, resources):
51+
"""Skip preprocessor if not enabled"""
52+
if self.embed_images:
53+
nb, resources = super(EmbedImagesPreprocessor, self).preprocess(nb, resources)
54+
return nb, resources
55+
56+
def replfunc_md(self, match):
57+
"""Read image and store as base64 encoded attachment"""
58+
url = match.group(2)
59+
imgformat = url.split('.')[-1]
60+
if url.startswith('http'):
61+
if self.embed_remote_images:
62+
data = urlopen(url).read()
63+
else:
64+
return match.string
65+
elif url.startswith('attachment'):
66+
return match.string
67+
else:
68+
filename = os.path.join(self.path, url)
69+
with open(filename, 'rb') as f:
70+
data = f.read()
71+
72+
# TODO: scale image...
73+
if self.dpi_scaling > 0 and imgformat in ['png', 'jpg']:
74+
try:
75+
import pillow as PIL
76+
except ImportError:
77+
self.log.info("pillow library not available to scale images")
78+
if PIL:
79+
#im = PIL.Image.open(StringIO(data))
80+
#size = 128, 128
81+
#im.thumbnail(size, PIL.Image.ANTIALIAS)
82+
#data = im.save()
83+
self.log.info("Rescaled image %s to size %d x %d pixels" % (imgname, size) )
84+
85+
self.log.debug("embedding url: %s, format: %s" % (url, imgformat))
86+
b64_data = base64.b64encode(data).decode("utf-8")
87+
self.attachments[url] = { 'image/'+imgformat : b64_data }
88+
89+
newimg = '![' + match.group(1) + '](attachment:' + match.group(2) + ')'
90+
return newimg
91+
92+
def preprocess_cell(self, cell, resources, index):
93+
"""
94+
Preprocess cell
95+
96+
Parameters
97+
----------
98+
cell : NotebookNode cell
99+
Notebook cell being processed
100+
resources : dictionary
101+
Additional resources used in the conversion process. Allows
102+
preprocessors to pass variables into the Jinja engine.
103+
cell_index : int
104+
Index of the cell being processed (see base.py)
105+
"""
106+
self.path = resources['metadata']['path']
107+
self.attachments = Struct()
108+
109+
if cell.cell_type == "markdown":
110+
regex = re.compile('!\[([^"]*)\]\(([^"]+)\)')
111+
cell.source = regex.sub(self.replfunc_md, cell.source)
112+
cell.attachments = self.attachments
113+
return cell, resources

0 commit comments

Comments
 (0)