Skip to content

Commit ed58790

Browse files
committed
Clean up description via HTML escape
As the description is taken from the raw text of the document it can contain some HTML reserved characters. This change will escape the description string to use HTML entities instead.
1 parent 1da6c73 commit ed58790

File tree

4 files changed

+29
-0
lines changed

4 files changed

+29
-0
lines changed

sphinxext/opengraph/descriptionparser.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import html
12
import string
23
from typing import Iterable
34

@@ -72,6 +73,9 @@ def dispatch_visit(self, node: nodes.Element) -> None:
7273
if len(node.children) == 0:
7374
text = node.astext().replace("\r", "").replace("\n", " ").strip()
7475

76+
# Ensure string contains HTML-safe characters
77+
text = html.escape(text, True)
78+
7579
# Remove double spaces
7680
while text.find(" ") != -1:
7781
text = text.replace(" ", " ")
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
extensions = ["sphinxext.opengraph"]
2+
3+
master_doc = "index"
4+
exclude_patterns = ["_build"]
5+
6+
html_theme = "basic"
7+
8+
ogp_site_url = "http://example.org/en/latest/"
9+
10+
enable_meta_description = True
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lorem <ipsum> dolor sit amet, "consectetur" adipiscing elit. Suspendisse at lorem ornare, fringilla massa nec, venenatis mi. Donec erat sapien, tincidunt nec rhoncus nec, scelerisque id diam. Orci varius natoque penatibus et magnis dis parturient mauris.

tests/test_options.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ def get_tag_content(tags, tag_type, kind="property", prefix="og"):
1212
return get_tag(tags, tag_type, kind, prefix).get("content", "")
1313

1414

15+
def get_tag_content_text(tags, tag_type, kind="property", prefix="og"):
16+
# Gets the content of a specific ogp tag
17+
return get_tag(tags, tag_type, kind, prefix).get_text("content", "")
18+
19+
1520
def get_meta_description(tags):
1621
return [tag for tag in tags if tag.get("name") == "description"][0].get(
1722
"content", ""
@@ -39,6 +44,15 @@ def test_meta_name_description(meta_tags):
3944
assert description == og_description
4045

4146

47+
@pytest.mark.sphinx("html", testroot="meta-name-description-escape")
48+
def test_meta_name_description(meta_tags):
49+
og_description = get_tag_content(meta_tags, "description")
50+
og_description_text = get_tag_content_text(meta_tags, "description")
51+
52+
assert '<' in og_description
53+
assert '<' not in og_description_text
54+
55+
4256
@pytest.mark.sphinx("html", testroot="meta-name-description-manual-description")
4357
def test_meta_name_manual_description(meta_tags):
4458
og_description = get_tag_content(meta_tags, "description")

0 commit comments

Comments
 (0)