Skip to content

Commit dbfab61

Browse files
committed
Added s3 provider
1 parent d9c5da7 commit dbfab61

File tree

5 files changed

+163
-2
lines changed

5 files changed

+163
-2
lines changed

nbviewer/app.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ class NBViewer(Application):
201201
default_value="nbviewer.providers.local.handlers.LocalFileHandler",
202202
help="The Tornado handler to use for viewing notebooks found on a local filesystem",
203203
).tag(config=True)
204+
s3_handler = Unicode(
205+
default_value="nbviewer.providers.s3.handlers.S3Handler",
206+
help="The Tornado handler to use for viewing notebooks from amazon S3",
207+
).tag(config=True)
204208
url_handler = Unicode(
205209
default_value="nbviewer.providers.url.handlers.URLHandler",
206210
help="The Tornado handler to use for viewing notebooks accessed via URL",
@@ -625,6 +629,7 @@ def init_tornado_application(self):
625629
github_user_handler=self.github_user_handler,
626630
index_handler=self.index_handler,
627631
local_handler=self.local_handler,
632+
s3_handler=self.s3_handler,
628633
url_handler=self.url_handler,
629634
user_gists_handler=self.user_gists_handler,
630635
)

nbviewer/providers/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
# -----------------------------------------------------------------------------
77

88
default_providers = [
9-
"nbviewer.providers.{}".format(prov) for prov in ["url", "github", "gist"]
9+
"nbviewer.providers.{}".format(prov) for prov in ["url", "github", "gist", "s3"]
1010
]
1111

1212
default_rewrites = [
1313
"nbviewer.providers.{}".format(prov)
14-
for prov in ["gist", "github", "dropbox", "url"]
14+
for prov in ["gist", "github", "dropbox", "url", "s3"]
1515
]
1616

1717

nbviewer/providers/s3/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .handlers import default_handlers
2+
from .handlers import S3Handler
3+
from .handlers import uri_rewrites

nbviewer/providers/s3/handlers.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# -----------------------------------------------------------------------------
2+
# Copyright (C) Jupyter Development Team
3+
#
4+
# Distributed under the terms of the BSD License. The full license is in
5+
# the file COPYING, distributed as part of this software.
6+
# -----------------------------------------------------------------------------
7+
import errno
8+
import io
9+
import os
10+
from datetime import datetime
11+
from urllib.parse import urlparse
12+
import boto3
13+
import botocore
14+
from tornado import web
15+
16+
from .. import _load_handler_from_location
17+
from ...utils import url_path_join
18+
from ..base import cached
19+
from ..base import RenderingHandler
20+
21+
22+
class S3Handler(RenderingHandler):
23+
"""Renderer for s3://
24+
25+
Serving notebooks from S3 buckets
26+
"""
27+
def initialize(self, **kwargs):
28+
self.s3_client = boto3.client("s3")
29+
self._downloadable_data = None
30+
self._downloaded_path = None
31+
super().initialize(**kwargs)
32+
33+
34+
async def download(self, path):
35+
"""Download the notebook"""
36+
headers = await self.get_notebook_headers(path)
37+
filename=os.path.basename(path)
38+
self.set_header("Content-Length", headers["ContentLength"])
39+
# Escape commas to workaround Chrome issue with commas in download filenames
40+
self.set_header(
41+
"Content-Disposition",
42+
"attachment; filename={};".format(filename.replace(",", "_")),
43+
)
44+
if self._downloaded_path == path and self._downloadable_data is not None:
45+
content = self._downloadable_data
46+
else:
47+
content = await self.read_s3_file(path)
48+
49+
if isinstance(content, bytes):
50+
content = [content]
51+
for chunk in content:
52+
try:
53+
self.write(chunk)
54+
await self.flush()
55+
except iostream.StreamClosedError:
56+
return
57+
58+
59+
async def get_notebook_data(self, path):
60+
"""Get additional notebook data"""
61+
is_download = self.get_query_arguments("download")
62+
if is_download:
63+
await self.download(path)
64+
return
65+
66+
return path
67+
68+
69+
async def get_notebook_headers(self, path):
70+
"""Get the size of a notebook file."""
71+
o = urlparse(path)
72+
bucket = o.netloc
73+
key = o.path[1:]
74+
self.log.debug("Getting headers for %s from %s", key, bucket)
75+
try:
76+
head = self.s3_client.head_object(Bucket=bucket, Key=key)
77+
except botocore.exceptions.ClientError as ex:
78+
if ex.response["Error"]["Code"] == "404":
79+
self.log.info("The notebook %s does not exist.", path)
80+
raise web.HTTPError(404)
81+
raise ex
82+
return head
83+
84+
85+
async def read_s3_file(self, path):
86+
"""Download the notebook file from s3."""
87+
o = urlparse(path)
88+
bucket = o.netloc
89+
key = o.path[1:]
90+
s3_file = io.BytesIO()
91+
self.log.debug("Reading %s from %s", key, bucket)
92+
try:
93+
self.s3_client.download_fileobj(bucket, key, s3_file)
94+
except botocore.exceptions.ClientError as ex:
95+
if ex.response["Error"]["Code"] == "404":
96+
self.log.info("The notebook %s does not exist.", path)
97+
raise web.HTTPError(404)
98+
raise ex
99+
s3_file.seek(0)
100+
self.log.debug("Done downloading.")
101+
self._downloadable_data = s3_file.read().decode('utf-8')
102+
self._downloaded_path = path
103+
return self._downloadable_data
104+
105+
106+
async def deliver_notebook(self, path):
107+
nbdata = await self.read_s3_file(path)
108+
109+
# Explanation of some kwargs passed into `finish_notebook`:
110+
# breadcrumbs: list of dict
111+
# Breadcrumb 'name' and 'url' to render as links at the top of the notebook page
112+
# title: str
113+
# Title to use as the HTML page title (i.e., text on the browser tab)
114+
await self.finish_notebook(
115+
nbdata,
116+
download_url="?download",
117+
msg="file from s3: %s" % path,
118+
public=False,
119+
breadcrumbs=[],
120+
title=os.path.basename(path),
121+
)
122+
123+
@cached
124+
async def get(self, path):
125+
"""Get an s3 notebook
126+
127+
Parameters
128+
==========
129+
path: str
130+
s3 uri
131+
"""
132+
fullpath = await self.get_notebook_data(path)
133+
134+
# get_notebook_data returns None if a directory is to be shown or a notebook is to be downloaded,
135+
# i.e. if no notebook is supposed to be rendered, making deliver_notebook inappropriate
136+
if fullpath is not None:
137+
await self.deliver_notebook(fullpath)
138+
139+
140+
141+
def default_handlers(handlers=[], **handler_names):
142+
"""Tornado handlers"""
143+
144+
s3_handler = _load_handler_from_location(handler_names["s3_handler"])
145+
146+
return handlers + [(r"/(s3%3A//.*)", s3_handler, {})]
147+
148+
def uri_rewrites(rewrites=[]):
149+
return [
150+
(r"^(s3://.*)$", "{0}"),
151+
]
152+

requirements.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
elasticsearch
22
ipython
3+
boto3
34
jupyter_client
45
jupyter_server>=0.2.0
56
markdown>=3.0,==3.1.1 # pin until we workaround #909, which is a regression in 3.2

0 commit comments

Comments
 (0)