Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 34 additions & 52 deletions airlock/renderers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,65 +6,43 @@
import re
from dataclasses import dataclass
from email.utils import formatdate
from functools import cached_property
from io import BytesIO, StringIO
from pathlib import Path
from typing import IO, Any, ClassVar, Self, cast
from typing import IO, Any, ClassVar, cast

from ansi2html import Ansi2HTMLConverter
from django.conf import settings
from django.http import FileResponse, HttpResponseBase
from django.template import Template, loader
from django.template.response import SimpleTemplateResponse
from django.utils.safestring import mark_safe

from airlock.types import UrlPath
from airlock.utils import is_valid_file_type
from airlock.utils import is_valid_file_type, truncate_log_stream


@dataclass
class RendererTemplate:
name: str
path: Path
template: Template
content_cache_id: str

@classmethod
def from_name(cls, name: str) -> Self:
template = cls.get_template(name)
content_cache_id = cls.content_key(template)
return cls(
name,
template=template,
path=Path(template.origin.name),
content_cache_id=content_cache_id,
)

@staticmethod
def get_template(name) -> Template:
return cast(Template, loader.get_template(name))

@staticmethod
def content_key(template: Template) -> str:
# loader.get_template() returns a different Template depending on
# which template engine is used. Usually this will be a DjangoTemplates
# engine (django.template.backends.django.Template), but we cast it to the
# publicly exposed django.template.Template. Both versions of Template have
# a .render() method which works for the response, but django.template.Template
# doesn't have a template attribute, which we need for getting the source content
# So we just tell mypy to ignore here.
return hashlib.sha256(template.template.source.encode()).hexdigest() # type: ignore

def reload(self):
return type(self).from_name(self.name)
@property
def template(self) -> Template:
# This is cached by django, so is fast
t = cast(Template, loader.get_template(self.name))

# Calculate content hash if not already done, an attach it to the
# django template instance. This is a little hacky, but its convenient
# way to ensure that the content hash changes if the template has been
# reloaded, which is really only when DEBUG=True. As our tests run with
# DEBUG=True, this is marked as nocover.
if not hasattr(t, "airlock_content_hash"): # pragma: no cover
t.airlock_content_hash = hashlib.md5(t.template.source.encode()).hexdigest() # type: ignore
return t

def cache_id(self):
# cache the template using its content rather than filesystem data
# Django caches templates by default, so loading the template again
# is cheap
# We don't want to use the template mtime in the cache ID because it
# will change after a deploy, even if the content is the same
template = self.get_template(self.name)
return self.content_key(template)
@property
def content_hash(self):
# type ignore because its our own monkeypatched attribute
return self.template.airlock_content_hash # type: ignore


@dataclass
Expand Down Expand Up @@ -93,10 +71,6 @@ def from_file(
else:
stream = abspath.open("rb")

# check if this template's content has changed since the TemplateRenderer was loaded
if cls.template and (cls.template.content_cache_id != cls.template.cache_id()):
cls.template = cls.template.reload()

return cls(
stream=stream,
file_cache_id=cache_id,
Expand All @@ -122,6 +96,7 @@ def from_contents(
def get_response(self):
if self.template:
context = self.context()
context.setdefault("filename", self.filename)
response: HttpResponseBase = SimpleTemplateResponse(
self.template.template, context
)
Expand All @@ -136,11 +111,11 @@ def get_response(self):
def context(self):
raise NotImplementedError()

@cached_property
@property
def cache_id(self):
cache_id = self.file_cache_id
if self.template:
cache_id += "-" + self.template.cache_id()
cache_id += "-" + self.template.content_hash

return cache_id

Expand All @@ -158,7 +133,7 @@ def headers(self):


class CSVRenderer(Renderer):
template = RendererTemplate.from_name("file_browser/file_content/csv.html")
template = RendererTemplate("file_browser/file_content/csv.html")
is_text: ClassVar[bool] = True

def context(self):
Expand All @@ -173,23 +148,25 @@ def context(self):


class TextRenderer(Renderer):
template = RendererTemplate.from_name("file_browser/file_content/text.html")
template = RendererTemplate("file_browser/file_content/text.html")
is_text: ClassVar[bool] = True

def context(self):
return {
"text": self.stream.read(),
"class": Path(self.filename).suffix.lstrip("."),
"truncated": False,
}


class InvalidFileRenderer(Renderer):
template = RendererTemplate.from_name("file_browser/file_content/text.html")
template = RendererTemplate("file_browser/file_content/text.html")

def context(self):
return {
"text": f"{self.filename} is not a valid file type and cannot be displayed.",
"class": "",
"truncated": False,
}


Expand All @@ -200,7 +177,10 @@ def context(self):
# We don't need the full HTML file that's produced, so just extract the <pre></pre>
# tag which contains the log content and the inline styles.
conv = Ansi2HTMLConverter()
text = conv.convert(self.stream.read())

# truncate the logs if needed
log, truncated = truncate_log_stream(self.stream, settings.MAX_LOG_BYTES)
text = conv.convert(log)
match = re.match(
r".*(?P<style_tag><style.*</style>).*(?P<pre_tag><pre.*</pre>).*",
text,
Expand All @@ -216,6 +196,8 @@ def context(self):
return {
"text": text,
"class": Path(self.filename).suffix.lstrip("."),
"truncated": truncated,
"limit_kb": settings.MAX_LOG_BYTES // 1000,
}


Expand Down
3 changes: 3 additions & 0 deletions airlock/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,6 @@ def filter(self, record): # pragma: no cover

UPLOAD_DELAY = float(os.environ.get("AIRLOCK_UPLOAD_DELAY", 1))
UPLOAD_RETRY_DELAY = float(os.environ.get("AIRLOCK_UPLOAD_RETRY_DELAY", 60))

# logs are truncated to this many
MAX_LOG_BYTES = 10_000
31 changes: 31 additions & 0 deletions airlock/templates/file_browser/file_content/content_base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{% load static %}
{% load django_vite %}

<!DOCTYPE html>
<html lang="en" class="min-h-screen">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>{% block metatitle %}Airlock{% endblock metatitle %}</title>

{% block extra_meta %}{% endblock %}

{% vite_hmr_client %}
{% vite_asset "assets/src/scripts/base.js" app="job_server" %}
{% vite_asset "assets/src/scripts/main.js" %}

<link rel="stylesheet" href="{% static 'assets/components.css' %}">
<link rel="stylesheet" href="{% static 'assets/icons.css' %}">

{% block extra_styles %}{% endblock %}

<link rel="icon" href="{% static "favicon.ico" %}">
<link rel="icon" href="{% static "icon.svg" %}" type="image/svg+xml">
</head>

<body class="flex flex-col min-h-screen text-slate bg-white">
{% block content %}{% endblock %}
</body>
</html>
108 changes: 46 additions & 62 deletions airlock/templates/file_browser/file_content/csv.html
Original file line number Diff line number Diff line change
@@ -1,81 +1,65 @@
{% extends "file_browser/file_content/content_base.html" %}
{% load airlock %}
{% load django_vite %}
{% load static %}

<!DOCTYPE html>
<html lang="en" class="min-h-screen">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">

{% vite_hmr_client %}
{% vite_asset "assets/src/scripts/main.js" %}

</head>

<body>

<div id="airlock-table">
{% if use_clusterize_table %}
{% fragment as header_row %}
<tr>
<th class="sort-ascending">
{% block metatitle %}{{ filename }}{% endblock %}
{% block content %}
<div id="airlock-table">
{% if use_clusterize_table %}
{% fragment as header_row %}
<tr>
<th class="sort-ascending">
<button class="clusterize-table-sorter p-2 relative text-left w-full">
<div class="flex flex-row gap-2 items-center">
<span class="sort-icon h-4 w-4 [&_img]:h-4 [&_img]:w-4">
{% datatable_sort_icon %}
</span>
</div>
</button>
</th>
{% for header in headers %}
<th>
<button class="clusterize-table-sorter p-2 relative text-left w-full">
<div class="flex flex-row gap-2 items-center">
{{ header }}
<span class="sort-icon h-4 w-4 [&_img]:h-4 [&_img]:w-4">
{% datatable_sort_icon %}
</span>
</div>
</button>
</th>
{% endfor %}
</tr>
{% endfragment %}
{% #clusterize_table header_row=header_row %}
{% for index, row in rows %}
<tr><td class="datatable-row-number">{{ index }}</td>
{% for cell in row %}<td>{{ cell }}</td>{% endfor %}</tr>
{% endfor %}
{% /clusterize_table %}

{% else %}
<table>
<thead>
<tr>
<th></th>
{% for header in headers %}
<th>
<button class="clusterize-table-sorter p-2 relative text-left w-full">
<div class="flex flex-row gap-2 items-center">
{{ header }}
<span class="sort-icon h-4 w-4 [&_img]:h-4 [&_img]:w-4">
{% datatable_sort_icon %}
</span>
</div>
</button>
{{ header }}
</th>
{% endfor %}
</tr>
{% endfragment %}
{% #clusterize_table header_row=header_row %}
</thead>
<tbody>
{% for index, row in rows %}
<tr><td class="datatable-row-number">{{ index }}</td>
{% for cell in row %}<td>{{ cell }}</td>{% endfor %}</tr>
{% endfor %}
{% /clusterize_table %}

{% else %}
<table>
<thead>
<tr>
<th></th>
{% for header in headers %}
<th>
{{ header }}
</th>
<td>{{ index }}</td>
{% for cell in row %}
<td>{{ cell }}</td>
{% endfor %}
</tr>
</thead>
<tbody>
{% for index, row in rows %}
<tr>
<td>{{ index }}</td>
{% for cell in row %}
<td>{{ cell }}</td>
{% endfor %}
</tr>
{% endfor %}
</tbody>
</table>
{% endif %}

</div>
</body>
{% endfor %}
</tbody>
</table>
{% endif %}

</html>
</div>
{% endblock %}
14 changes: 11 additions & 3 deletions airlock/templates/file_browser/file_content/text.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
<pre class="{{ class }}">
{{ text }}
</pre>
{% extends "file_browser/file_content/content_base.html" %}
{% block metatitle %}{{ filename }}{% endblock %}
{% block content %}
{% if truncated|default:False %}
{% #alert variant="info" title="Log truncated" class="mt-4" no_icon=True %}
This log file is larger than the log viewing limit of {{ limit_kb }}kb, so we have only shown you the last {{ limit_kb }}kb.
If you need to see more of the log, contact OpenSAFELY tech support.
{% /alert %}
{% endif %}
<pre class="{{ class }}">{{ text }}</pre>
{% endblock %}
19 changes: 19 additions & 0 deletions airlock/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
from typing import IO

from pipeline.constants import LEVEL4_FILE_TYPES

Expand All @@ -7,3 +8,21 @@

def is_valid_file_type(path: Path | UrlPath):
return not path.name.startswith(".") and path.suffix in LEVEL4_FILE_TYPES


def truncate_log_stream(stream: IO[str], n: int):
"""Efficiently read the last n bytes from a log file.

If it has been truncated, remove any partial lines.
"""
full_stream = stream.read()
if len(full_stream) > n:
truncated = full_stream[-n:]
newline_pos = truncated.find("\n")
# if there is more than 1 line
if newline_pos != len(truncated) - 1:
# remove any partial lines
truncated = truncated[newline_pos + 1 :]
return truncated, True
else:
return full_stream, False
Loading