Skip to content

Commit 867d7f6

Browse files
committed
Sanitize html (e.g. removing script tags)
1 parent 58cb595 commit 867d7f6

File tree

3 files changed

+10
-1
lines changed

3 files changed

+10
-1
lines changed

data_hub_api/docmaps/v2/provider.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from time import monotonic
66
from typing import Iterable, Optional, Sequence, cast
77

8+
import nh3
89
import objsize
910
from data_hub_api.docmaps.v2.codecs.docmaps import get_docmap_item_for_query_result_item
1011
from data_hub_api.docmaps.v2.api_input_typing import ApiInput
@@ -26,7 +27,9 @@
2627
def get_html_formatted_evaluation_content(
2728
evaluation_content: str
2829
) -> str:
29-
return markdown(evaluation_content)
30+
html = markdown(evaluation_content)
31+
sanitized_html = nh3.clean(html, link_rel=None)
32+
return sanitized_html
3033

3134

3235
class DocmapsProvider:

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ objsize==0.8.0
33
uvicorn[standard]==0.39.0
44
google-cloud-bigquery==3.40.1
55
Markdown==3.9
6+
nh3==0.3.3
67
pandas==2.3.3
78
numpy==2.0.2
89
types-python-dateutil

tests/unit_tests/docmaps/v2/provider_test.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ def test_should_convert_markdown_link_to_html_link(self):
5252
'[link text](https://www.example.com)'
5353
) == '<p><a href="https://www.example.com">link text</a></p>'
5454

55+
def test_should_remove_script_tags_from_markdown(self):
56+
assert get_html_formatted_evaluation_content(
57+
'cleaned <script>var bad = true;</script> html'
58+
) == '<p>cleaned html</p>'
59+
5560

5661
class TestDocmapsProvider:
5762
def test_should_create_index_with_non_empty_docmaps(

0 commit comments

Comments
 (0)