Skip to content

Commit bd93a3e

Browse files
committed
Now renders files with fluffy-code
1 parent 760769b commit bd93a3e

File tree

6 files changed

+185
-21
lines changed

6 files changed

+185
-21
lines changed

poetry.lock

Lines changed: 16 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pypi_view/app.py

Lines changed: 129 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1+
import io
12
import mimetypes
23
import os.path
34

5+
import pygments.lexers
6+
import pygments.lexers.special
7+
import fluffy_code.code
8+
import fluffy_code.prebuilt_styles
9+
from identify import identify
410
from starlette.applications import Starlette
511
from starlette.staticfiles import StaticFiles
612
from starlette.requests import Request
@@ -16,6 +22,35 @@
1622
PACKAGE_TYPE_NOT_SUPPORTED_ERROR = (
1723
"Sorry, this package type is not yet supported (only .zip and .whl supported currently)."
1824
)
25+
TEXT_RENDER_FILESIZE_LIMIT = 20 * 1024 # 20 KiB
26+
27+
# Mime types which are allowed to be presented as detected.
28+
# TODO: I think we actually only need to prevent text/html (and any HTML
29+
# variants like XHTML)?
30+
MIME_WHITELIST = (
31+
'application/javascript',
32+
'application/json',
33+
'application/pdf',
34+
'application/x-ruby',
35+
'audio/',
36+
'image/',
37+
'text/css',
38+
'text/plain',
39+
'text/x-python',
40+
'text/x-sh',
41+
'video/',
42+
)
43+
44+
# Mime types which should be displayed inline in the browser, as opposed to
45+
# being downloaded. This is used to populate the Content-Disposition header.
46+
# Only binary MIMEs need to be whitelisted here, since detected non-binary
47+
# files are always inline.
48+
INLINE_DISPLAY_MIME_WHITELIST = (
49+
'application/pdf',
50+
'audio/',
51+
'image/',
52+
'video/',
53+
)
1954

2055

2156
install_root = os.path.dirname(__file__)
@@ -124,17 +159,100 @@ async def package_file_archive_path(request: Request) -> Response:
124159
status_code=404,
125160
)
126161
entry = matching_entries[0]
162+
mimetype, _ = mimetypes.guess_type(archive_path)
127163

128-
async def transfer_file():
129-
async with package.open_from_archive(archive_path) as f:
130-
data = None
131-
while data is None or len(data) > 0:
132-
data = await f.read(1024)
133-
yield data
164+
def _transfer_raw():
165+
"""Return the file verbatim."""
166+
async def transfer_file():
167+
async with package.open_from_archive(archive_path) as f:
168+
data = None
169+
while data is None or len(data) > 0:
170+
data = await f.read(1024)
171+
yield data
134172

135-
mimetype, _ = mimetypes.guess_type(archive_path)
136-
return StreamingResponse(
137-
transfer_file(),
138-
media_type=mimetype or "text/plain",
139-
headers={"Content-Length": str(entry.size)},
173+
return StreamingResponse(
174+
transfer_file(),
175+
media_type=mimetype if mimetype.startswith(MIME_WHITELIST) else None,
176+
headers={"Content-Length": str(entry.size)},
177+
)
178+
179+
if "raw" in request.query_params:
180+
return _transfer_raw()
181+
182+
# There are a few cases to handle here:
183+
# (1) Reasonable-length text: render syntax highlighted in HTML
184+
# (2) Extremely long text: don't render, just show error and offer
185+
# link to the raw file
186+
# (3) Binary file that browsers can display (e.g. image): render raw
187+
# (4) Binary file that browsers cannot display (e.g. tarball): don't
188+
# render, just show warning and link to the raw file
189+
#
190+
# Note that except for images, the file extension isn't too useful to
191+
# determine the actual content since there are lots of files without
192+
# extensions (and lots of extensions not recognized by `mimetypes`).
193+
if mimetype is not None and mimetype.startswith(INLINE_DISPLAY_MIME_WHITELIST):
194+
# Case 3: render binary
195+
return _transfer_raw()
196+
197+
# Figure out if it looks like text or not.
198+
async with package.open_from_archive(archive_path) as f:
199+
first_chunk = await f.read(TEXT_RENDER_FILESIZE_LIMIT)
200+
201+
is_text = identify.is_text(io.BytesIO(first_chunk))
202+
203+
if is_text:
204+
if entry.size <= TEXT_RENDER_FILESIZE_LIMIT:
205+
# Case 1: render syntax-highlighted.
206+
style_config = fluffy_code.prebuilt_styles.default_style()
207+
208+
try:
209+
lexer = pygments.lexers.guess_lexer_for_filename(
210+
archive_path,
211+
first_chunk,
212+
)
213+
except pygments.lexers.ClassNotFound:
214+
lexer = pygments.lexers.special.TextLexer()
215+
216+
return templates.TemplateResponse(
217+
"package_file_archive_path.html",
218+
{
219+
"request": request,
220+
"package": package_name,
221+
"filename": file_name,
222+
"archive_path": archive_path,
223+
"rendered_text": fluffy_code.code.render(
224+
first_chunk,
225+
style_config=style_config,
226+
highlight_config=fluffy_code.code.HighlightConfig(
227+
lexer=lexer,
228+
highlight_diff=False,
229+
),
230+
),
231+
"extra_css": fluffy_code.code.get_global_css() + "\n" + style_config.css,
232+
"extra_js": fluffy_code.code.get_global_javascript(),
233+
},
234+
)
235+
else:
236+
# Case 2: too long to syntax highlight.
237+
return templates.TemplateResponse(
238+
"package_file_archive_path_cannot_render.html",
239+
{
240+
"request": request,
241+
"package": package_name,
242+
"filename": file_name,
243+
"archive_path": archive_path,
244+
"error": "This file is too long to syntax highlight.",
245+
},
246+
)
247+
248+
# Case 4: link to binary
249+
return templates.TemplateResponse(
250+
"package_file_archive_path_cannot_render.html",
251+
{
252+
"request": request,
253+
"package": package_name,
254+
"filename": file_name,
255+
"archive_path": archive_path,
256+
"error": "This file appears to be a binary.",
257+
},
140258
)

pypi_view/templates/base.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
<html>
33
<head>
44
<link rel="stylesheet" href="{{url_for('static', path='/site.css')}}" />
5+
{% block extra_head %}{% endblock %}
56
</head>
67
<body class="page-{{page}}">
78
{% block content %}{% endblock %}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{% set page = 'package-file-archive-path' %}
2+
{% extends 'base.html' %}
3+
4+
{% block extra_head %}
5+
<style>
6+
{{extra_css|safe}}
7+
</style>
8+
{% endblock %}
9+
10+
{% block extra_js %}
11+
<script>
12+
{{extra_js|safe}}
13+
</script>
14+
{% endblock %}
15+
16+
{% block content %}
17+
<h1>{{package}}: {{filename}}</h1>
18+
<h2>{{archive_path}}</h2>
19+
{{rendered_text|safe}}
20+
<p><a href="?raw">View Raw</a></p>
21+
{% endblock %}
22+
23+
{# vim: ft=jinja
24+
#}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{% set page = 'package-file-archive-path' %}
2+
{% extends 'base.html' %}
3+
4+
{% block content %}
5+
<h1>{{package}}: {{filename}}</h1>
6+
<h2>{{archive_path}}</h2>
7+
<p>{{error}}</p>
8+
<p><a href="?raw">View Raw</a></p>
9+
{% endblock %}
10+
11+
{# vim: ft=jinja
12+
#}

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ packages = [{include = "pypi_view"}]
1010
[tool.poetry.dependencies]
1111
python = "^3.9"
1212
starlette = "*"
13-
fluffy-code = "^0.0.0"
13+
fluffy-code = "^0.0.1"
1414
Jinja2 = "^3.1.2"
1515
httpx = "^0.23.0"
1616
aiofiles = "^22.1.0"
17+
identify = "^2.5.5"
18+
Pygments = "^2.13.0"
1719

1820

1921
[tool.poetry.group.dev.dependencies]

0 commit comments

Comments
 (0)