|
| 1 | +import io |
1 | 2 | import mimetypes
|
2 | 3 | import os.path
|
3 | 4 |
|
| 5 | +import pygments.lexers |
| 6 | +import pygments.lexers.special |
| 7 | +import fluffy_code.code |
| 8 | +import fluffy_code.prebuilt_styles |
| 9 | +from identify import identify |
4 | 10 | from starlette.applications import Starlette
|
5 | 11 | from starlette.staticfiles import StaticFiles
|
6 | 12 | from starlette.requests import Request
|
|
16 | 22 | PACKAGE_TYPE_NOT_SUPPORTED_ERROR = (
|
17 | 23 | "Sorry, this package type is not yet supported (only .zip and .whl supported currently)."
|
18 | 24 | )
|
| 25 | +TEXT_RENDER_FILESIZE_LIMIT = 20 * 1024 # 20 KiB |
| 26 | + |
| 27 | +# Mime types which are allowed to be presented as detected. |
| 28 | +# TODO: I think we actually only need to prevent text/html (and any HTML |
| 29 | +# variants like XHTML)? |
| 30 | +MIME_WHITELIST = ( |
| 31 | + 'application/javascript', |
| 32 | + 'application/json', |
| 33 | + 'application/pdf', |
| 34 | + 'application/x-ruby', |
| 35 | + 'audio/', |
| 36 | + 'image/', |
| 37 | + 'text/css', |
| 38 | + 'text/plain', |
| 39 | + 'text/x-python', |
| 40 | + 'text/x-sh', |
| 41 | + 'video/', |
| 42 | +) |
| 43 | + |
| 44 | +# Mime types which should be displayed inline in the browser, as opposed to |
| 45 | +# being downloaded. This is used to populate the Content-Disposition header. |
| 46 | +# Only binary MIMEs need to be whitelisted here, since detected non-binary |
| 47 | +# files are always inline. |
| 48 | +INLINE_DISPLAY_MIME_WHITELIST = ( |
| 49 | + 'application/pdf', |
| 50 | + 'audio/', |
| 51 | + 'image/', |
| 52 | + 'video/', |
| 53 | +) |
19 | 54 |
|
20 | 55 |
|
21 | 56 | install_root = os.path.dirname(__file__)
|
@@ -124,17 +159,100 @@ async def package_file_archive_path(request: Request) -> Response:
|
124 | 159 | status_code=404,
|
125 | 160 | )
|
126 | 161 | entry = matching_entries[0]
|
| 162 | + mimetype, _ = mimetypes.guess_type(archive_path) |
127 | 163 |
|
128 |
| - async def transfer_file(): |
129 |
| - async with package.open_from_archive(archive_path) as f: |
130 |
| - data = None |
131 |
| - while data is None or len(data) > 0: |
132 |
| - data = await f.read(1024) |
133 |
| - yield data |
| 164 | + def _transfer_raw(): |
| 165 | + """Return the file verbatim.""" |
| 166 | + async def transfer_file(): |
| 167 | + async with package.open_from_archive(archive_path) as f: |
| 168 | + data = None |
| 169 | + while data is None or len(data) > 0: |
| 170 | + data = await f.read(1024) |
| 171 | + yield data |
134 | 172 |
|
135 |
| - mimetype, _ = mimetypes.guess_type(archive_path) |
136 |
| - return StreamingResponse( |
137 |
| - transfer_file(), |
138 |
| - media_type=mimetype or "text/plain", |
139 |
| - headers={"Content-Length": str(entry.size)}, |
| 173 | + return StreamingResponse( |
| 174 | + transfer_file(), |
| 175 | + media_type=mimetype if mimetype.startswith(MIME_WHITELIST) else None, |
| 176 | + headers={"Content-Length": str(entry.size)}, |
| 177 | + ) |
| 178 | + |
| 179 | + if "raw" in request.query_params: |
| 180 | + return _transfer_raw() |
| 181 | + |
| 182 | + # There are a few cases to handle here: |
| 183 | + # (1) Reasonable-length text: render syntax highlighted in HTML |
| 184 | + # (2) Extremely long text: don't render, just show error and offer |
| 185 | + # link to the raw file |
| 186 | + # (3) Binary file that browsers can display (e.g. image): render raw |
| 187 | + # (4) Binary file that browsers cannot display (e.g. tarball): don't |
| 188 | + # render, just show warning and link to the raw file |
| 189 | + # |
| 190 | + # Note that except for images, the file extension isn't too useful to |
| 191 | + # determine the actual content since there are lots of files without |
| 192 | + # extensions (and lots of extensions not recognized by `mimetypes`). |
| 193 | + if mimetype is not None and mimetype.startswith(INLINE_DISPLAY_MIME_WHITELIST): |
| 194 | + # Case 3: render binary |
| 195 | + return _transfer_raw() |
| 196 | + |
| 197 | + # Figure out if it looks like text or not. |
| 198 | + async with package.open_from_archive(archive_path) as f: |
| 199 | + first_chunk = await f.read(TEXT_RENDER_FILESIZE_LIMIT) |
| 200 | + |
| 201 | + is_text = identify.is_text(io.BytesIO(first_chunk)) |
| 202 | + |
| 203 | + if is_text: |
| 204 | + if entry.size <= TEXT_RENDER_FILESIZE_LIMIT: |
| 205 | + # Case 1: render syntax-highlighted. |
| 206 | + style_config = fluffy_code.prebuilt_styles.default_style() |
| 207 | + |
| 208 | + try: |
| 209 | + lexer = pygments.lexers.guess_lexer_for_filename( |
| 210 | + archive_path, |
| 211 | + first_chunk, |
| 212 | + ) |
| 213 | + except pygments.lexers.ClassNotFound: |
| 214 | + lexer = pygments.lexers.special.TextLexer() |
| 215 | + |
| 216 | + return templates.TemplateResponse( |
| 217 | + "package_file_archive_path.html", |
| 218 | + { |
| 219 | + "request": request, |
| 220 | + "package": package_name, |
| 221 | + "filename": file_name, |
| 222 | + "archive_path": archive_path, |
| 223 | + "rendered_text": fluffy_code.code.render( |
| 224 | + first_chunk, |
| 225 | + style_config=style_config, |
| 226 | + highlight_config=fluffy_code.code.HighlightConfig( |
| 227 | + lexer=lexer, |
| 228 | + highlight_diff=False, |
| 229 | + ), |
| 230 | + ), |
| 231 | + "extra_css": fluffy_code.code.get_global_css() + "\n" + style_config.css, |
| 232 | + "extra_js": fluffy_code.code.get_global_javascript(), |
| 233 | + }, |
| 234 | + ) |
| 235 | + else: |
| 236 | + # Case 2: too long to syntax highlight. |
| 237 | + return templates.TemplateResponse( |
| 238 | + "package_file_archive_path_cannot_render.html", |
| 239 | + { |
| 240 | + "request": request, |
| 241 | + "package": package_name, |
| 242 | + "filename": file_name, |
| 243 | + "archive_path": archive_path, |
| 244 | + "error": "This file is too long to syntax highlight.", |
| 245 | + }, |
| 246 | + ) |
| 247 | + |
| 248 | + # Case 4: link to binary |
| 249 | + return templates.TemplateResponse( |
| 250 | + "package_file_archive_path_cannot_render.html", |
| 251 | + { |
| 252 | + "request": request, |
| 253 | + "package": package_name, |
| 254 | + "filename": file_name, |
| 255 | + "archive_path": archive_path, |
| 256 | + "error": "This file appears to be a binary.", |
| 257 | + }, |
140 | 258 | )
|
0 commit comments