|
| 1 | +# ----------------------------------------------------------------------------- |
| 2 | +# Copyright (C) Jupyter Development Team |
| 3 | +# |
| 4 | +# Distributed under the terms of the BSD License. The full license is in |
| 5 | +# the file COPYING, distributed as part of this software. |
| 6 | +# ----------------------------------------------------------------------------- |
| 7 | +import errno |
| 8 | +import io |
| 9 | +import os |
| 10 | +from datetime import datetime |
| 11 | +from urllib.parse import urlparse |
| 12 | +import boto3 |
| 13 | +import botocore |
| 14 | +from tornado import web |
| 15 | + |
| 16 | +from .. import _load_handler_from_location |
| 17 | +from ...utils import url_path_join |
| 18 | +from ..base import cached |
| 19 | +from ..base import RenderingHandler |
| 20 | + |
| 21 | + |
| 22 | +class S3Handler(RenderingHandler): |
| 23 | + """Renderer for s3:// |
| 24 | +
|
| 25 | + Serving notebooks from S3 buckets |
| 26 | + """ |
| 27 | + def initialize(self, **kwargs): |
| 28 | + self.s3_client = boto3.client("s3") |
| 29 | + self._downloadable_data = None |
| 30 | + self._downloaded_path = None |
| 31 | + super().initialize(**kwargs) |
| 32 | + |
| 33 | + |
| 34 | + async def download(self, path): |
| 35 | + """Download the notebook""" |
| 36 | + headers = await self.get_notebook_headers(path) |
| 37 | + filename=os.path.basename(path) |
| 38 | + self.set_header("Content-Length", headers["ContentLength"]) |
| 39 | + # Escape commas to workaround Chrome issue with commas in download filenames |
| 40 | + self.set_header( |
| 41 | + "Content-Disposition", |
| 42 | + "attachment; filename={};".format(filename.replace(",", "_")), |
| 43 | + ) |
| 44 | + if self._downloaded_path == path and self._downloadable_data is not None: |
| 45 | + content = self._downloadable_data |
| 46 | + else: |
| 47 | + content = await self.read_s3_file(path) |
| 48 | + |
| 49 | + if isinstance(content, bytes): |
| 50 | + content = [content] |
| 51 | + for chunk in content: |
| 52 | + try: |
| 53 | + self.write(chunk) |
| 54 | + await self.flush() |
| 55 | + except iostream.StreamClosedError: |
| 56 | + return |
| 57 | + |
| 58 | + |
| 59 | + async def get_notebook_data(self, path): |
| 60 | + """Get additional notebook data""" |
| 61 | + is_download = self.get_query_arguments("download") |
| 62 | + if is_download: |
| 63 | + await self.download(path) |
| 64 | + return |
| 65 | + |
| 66 | + return path |
| 67 | + |
| 68 | + |
| 69 | + async def get_notebook_headers(self, path): |
| 70 | + """Get the size of a notebook file.""" |
| 71 | + o = urlparse(path) |
| 72 | + bucket = o.netloc |
| 73 | + key = o.path[1:] |
| 74 | + self.log.debug("Getting headers for %s from %s", key, bucket) |
| 75 | + try: |
| 76 | + head = self.s3_client.head_object(Bucket=bucket, Key=key) |
| 77 | + except botocore.exceptions.ClientError as ex: |
| 78 | + if ex.response["Error"]["Code"] == "404": |
| 79 | + self.log.info("The notebook %s does not exist.", path) |
| 80 | + raise web.HTTPError(404) |
| 81 | + raise ex |
| 82 | + return head |
| 83 | + |
| 84 | + |
| 85 | + async def read_s3_file(self, path): |
| 86 | + """Download the notebook file from s3.""" |
| 87 | + o = urlparse(path) |
| 88 | + bucket = o.netloc |
| 89 | + key = o.path[1:] |
| 90 | + s3_file = io.BytesIO() |
| 91 | + self.log.debug("Reading %s from %s", key, bucket) |
| 92 | + try: |
| 93 | + self.s3_client.download_fileobj(bucket, key, s3_file) |
| 94 | + except botocore.exceptions.ClientError as ex: |
| 95 | + if ex.response["Error"]["Code"] == "404": |
| 96 | + self.log.info("The notebook %s does not exist.", path) |
| 97 | + raise web.HTTPError(404) |
| 98 | + raise ex |
| 99 | + s3_file.seek(0) |
| 100 | + self.log.debug("Done downloading.") |
| 101 | + self._downloadable_data = s3_file.read().decode('utf-8') |
| 102 | + self._downloaded_path = path |
| 103 | + return self._downloadable_data |
| 104 | + |
| 105 | + |
| 106 | + async def deliver_notebook(self, path): |
| 107 | + nbdata = await self.read_s3_file(path) |
| 108 | + |
| 109 | + # Explanation of some kwargs passed into `finish_notebook`: |
| 110 | + # breadcrumbs: list of dict |
| 111 | + # Breadcrumb 'name' and 'url' to render as links at the top of the notebook page |
| 112 | + # title: str |
| 113 | + # Title to use as the HTML page title (i.e., text on the browser tab) |
| 114 | + await self.finish_notebook( |
| 115 | + nbdata, |
| 116 | + download_url="?download", |
| 117 | + msg="file from s3: %s" % path, |
| 118 | + public=False, |
| 119 | + breadcrumbs=[], |
| 120 | + title=os.path.basename(path), |
| 121 | + ) |
| 122 | + |
| 123 | + @cached |
| 124 | + async def get(self, path): |
| 125 | + """Get an s3 notebook |
| 126 | +
|
| 127 | + Parameters |
| 128 | + ========== |
| 129 | + path: str |
| 130 | + s3 uri |
| 131 | + """ |
| 132 | + fullpath = await self.get_notebook_data(path) |
| 133 | + |
| 134 | + # get_notebook_data returns None if a directory is to be shown or a notebook is to be downloaded, |
| 135 | + # i.e. if no notebook is supposed to be rendered, making deliver_notebook inappropriate |
| 136 | + if fullpath is not None: |
| 137 | + await self.deliver_notebook(fullpath) |
| 138 | + |
| 139 | + |
| 140 | + |
| 141 | +def default_handlers(handlers=[], **handler_names): |
| 142 | + """Tornado handlers""" |
| 143 | + |
| 144 | + s3_handler = _load_handler_from_location(handler_names["s3_handler"]) |
| 145 | + |
| 146 | + return handlers + [(r"/(s3%3A//.*)", s3_handler, {})] |
| 147 | + |
| 148 | +def uri_rewrites(rewrites=[]): |
| 149 | + return [ |
| 150 | + (r"^(s3://.*)$", "{0}"), |
| 151 | + ] |
| 152 | + |
0 commit comments