From 525d7377e8c036302a065cf5f22473d97a5a0abe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnter=20Milde?= Date: Mon, 8 Dec 2025 20:38:55 +0100 Subject: [PATCH 1/2] Unify handling of URIs with schemes that are not in the whitelist. Treat "off-list" schemes as part of a local wiki item name, so that page names like "Parrots: blue or green" are easy to access. Add check for safe URI scheme to Docbook. Don't drop the link text in HTML. Don't downcase the name in Markdown. Link to local wiki item instead of a silly error in rST. Closes [issue#2028]. --- src/moin/converters/_tests/test_rst_in.py | 47 +++++++++++++++-------- src/moin/converters/docbook_in.py | 6 ++- src/moin/converters/html_in.py | 22 +++-------- src/moin/converters/markdown_in.py | 8 +--- src/moin/converters/rst_in.py | 28 ++++++-------- 5 files changed, 54 insertions(+), 57 deletions(-) diff --git a/src/moin/converters/_tests/test_rst_in.py b/src/moin/converters/_tests/test_rst_in.py index 0c1c4ec0c..b4d1ba813 100644 --- a/src/moin/converters/_tests/test_rst_in.py +++ b/src/moin/converters/_tests/test_rst_in.py @@ -54,10 +54,6 @@ def setup_class(self): ".

", ), ("a _`Link`", '

a Link

'), - ( - "`Text `_", - '

Text

', - ), ( "Text\n\n~~~~~\n\nTest", '

Text

Test

', @@ -317,13 +313,7 @@ def test_field_list(self, input, output): '

Abra

Abra example arba

', ), ( - """ -Abra example_ arba - -.. _example: -.. _alias: - -text""", + "Abra example_ arba\n\n.. _example:\n.. _alias:\n\ntext", '

Abra example arba

text

', ), ( # A reference_ with no matching target links to a local Wiki item. @@ -336,15 +326,18 @@ def test_field_list(self, input, output): ), ( "`Whitespace is\nnormalized\xA0& CÄSE is Kept.`_", - '

Whitespace is\nnormalized\xA0& CÄSE is Kept.

', + '

' + "Whitespace is\nnormalized\xA0& CÄSE is Kept.

", ), ( # in rST, reference-name matching is case insensitive: "Chapter 1\n===============\n\nA reference to `chapter 1`_.\n", - 'Chapter 1

A reference to chapter 1.

', + 'Chapter 1' + '

A reference to chapter 1.

', ), ( # check handling of non-ASCII chars: "τίτλος\n^^^^^^\n\nA reference to `τίτλος`_.\n", - 'τίτλος

A reference to τίτλος.

', + 'τίτλος' + '

A reference to τίτλος.

', ), ( "§ With % strange & siLLY \n" @@ -352,16 +345,32 @@ def test_field_list(self, input, output): "Reference to `§ With % strange\n" "& siLLY \\<title>`_.\n", '<page><body><h outline-level="1">§ With % strange & siLLY <title></h>' - '<p>Reference to <a xlink:href="wiki.local:#A.2BAKc_With_.25_strange_.26_siLLY_.3Ctitle.3E">§ With % strange\n' + '<p>Reference to <a xlink:href="wiki.local:#A.2BAKc_With_.25_strange_.26_siLLY_.3Ctitle.3E">' + "§ With % strange\n" "& siLLY <title></a>.</p></body></page>", ), ( "http://www.python.org/", '<page><body><p><a xlink:href="http://www.python.org/">http://www.python.org/</a></p></body></page>', ), - ("http:Home", '<page><body><p><a xlink:href="wiki.local:Home">http:Home</a></p></body></page>'), - ("`Home <http:Home>`_", '<page><body><p><a xlink:href="wiki.local:Home">Home</a></p></body></page>'), + ( # legacy syntax for Wiki-internal links (use URI references without scheme instead) + "http:Home", + '<page><body><p><a xlink:href="wiki.local:Home">http:Home</a></p></body></page>', + ), + ("`<http:Home>`__", '<page><body><p><a xlink:href="wiki.local:Home">http:Home</a></p></body></page>'), ( + r"`<https:Home:\ alone>`__", + '<page><body><p><a xlink:href="wiki.local:Home:%20alone">https:Home: alone</a></p></body></page>', + ), + ( # no URI scheme: resolve as wiki-internal link + "`<Home>`__", + '<page><body><p><a xlink:href="wiki.local:Home">Home</a></p></body></page>', + ), + ( + r"`<Home:\ alone>`__", + '<page><body><p><a xlink:href="wiki.local:Home:%20alone">Home: alone</a></p></body></page>', + ), + ( # rST recognizes e-mail addresses "mailto:me@moin.com", '<page><body><p><a xlink:href="mailto:me@moin.com">mailto:me@moin.com</a></p></body></page>', ), @@ -373,6 +382,10 @@ def test_field_list(self, input, output): "`Write to me`_ with your questions.\n\n.. _Write to me: jdoe@example.com", '<page><body><p><a xlink:href="mailto:jdoe@example.com">Write to me</a> with your questions.</p></body></page>', ), + ( # URI schemes not on the whitelist are interpreted as local wiki item names + "`Text <javascript:alert('xss')>`_", + "<page><body><p><a xlink:href=\"wiki.local:javascript:alert%28'xss'%29\">Text</a></p></body></page>", + ), ] @pytest.mark.usefixtures("_app_ctx") diff --git a/src/moin/converters/docbook_in.py b/src/moin/converters/docbook_in.py index 310052a88..a1ce5aee0 100644 --- a/src/moin/converters/docbook_in.py +++ b/src/moin/converters/docbook_in.py @@ -24,6 +24,7 @@ # in case converters become an independent package flaskg = None +from moin.constants.misc import URI_SCHEMES from moin.utils.iri import Iri from moin.utils.mime import Type, type_moin_document from moin.utils.tree import moin_page, xlink, docbook, xml, html, xinclude @@ -862,8 +863,9 @@ def visit_docbook_link(self, element, depth): if linkend: href = "".join(["#", linkend]) iri = Iri(href) - if iri.scheme is None: - iri.scheme = "wiki.local" + # ensure a safe scheme, fall back to wiki-internal reference: + if iri.scheme not in URI_SCHEMES: + iri = Iri("wiki.local:" + href) attrib[xlink.href] = iri return self.new_copy(moin_page.a, element, depth, attrib) diff --git a/src/moin/converters/html_in.py b/src/moin/converters/html_in.py index 2eb473bc4..04d218adf 100644 --- a/src/moin/converters/html_in.py +++ b/src/moin/converters/html_in.py @@ -18,13 +18,14 @@ from markupsafe import escape +from moin.constants.misc import URI_SCHEMES from moin.i18n import _ from moin.utils.iri import Iri from moin.utils.tree import html, moin_page, xlink, xml from moin.utils.mime import Type, type_moin_document from . import default_registry -from ._util import allowed_uri_scheme, decode_data, normalize_split_text +from ._util import decode_data, normalize_split_text from moin import log @@ -425,21 +426,10 @@ def visit_xhtml_a(self, element): href = element.get(html.href) if self.base_url: href = "".join([self.base_url, href]) - if allowed_uri_scheme(href): - iri = Iri(href) - else: - # URI schemes that are not in the whitelist like: """<a href="javascript:alert('hi')">Test</a>""" - # are converted to: """javascript:alert('hi')""" - # TODO: don't drop the link text, convert to - # - # Test >javascript:alert('hi')< - # - # orr treat the href as wiki-local URI-reference: - # - # href="wiki.local:javascript:alert('hi') - return href - if iri.scheme is None: - iri.scheme = "wiki.local" + iri = Iri(href) + # ensure a safe scheme, fall back to wiki-internal reference + if iri.scheme not in URI_SCHEMES: + iri = Iri("wiki.local:" + href) attrib[key] = iri return self.new_copy(moin_page.a, element, attrib) diff --git a/src/moin/converters/markdown_in.py b/src/moin/converters/markdown_in.py index 4619bb0f1..88dd84494 100644 --- a/src/moin/converters/markdown_in.py +++ b/src/moin/converters/markdown_in.py @@ -372,13 +372,9 @@ def visit_a(self, element): attrib[html.title_] = element.attrib.get("title") href = postproc_text(self.markdown, element.attrib.get("href")) iri = Iri(href) - # iri has authority, fragment, path, query, scheme = none,none,path,none - # Check, if the IRI scheme is whitelisted, - # if not, handle the IRI as wiki-local reference: + # ensure a safe scheme, fall back to wiki-internal reference if iri.scheme not in URI_SCHEMES: - if iri.scheme: - iri.path = f"{iri.scheme}:{iri.path}" - iri.scheme = "wiki.local" + iri = Iri("wiki.local:" + href) attrib[key] = iri return self.new_copy(moin_page.a, element, attrib) diff --git a/src/moin/converters/rst_in.py b/src/moin/converters/rst_in.py index ee180545f..b4661723c 100644 --- a/src/moin/converters/rst_in.py +++ b/src/moin/converters/rst_in.py @@ -28,13 +28,14 @@ # in case converters become an independent package flaskg = None +from moin.constants.misc import URI_SCHEMES from moin.utils.iri import Iri from moin.utils.tree import html, moin_page, xlink, xinclude from moin.utils.mime import Type, type_moin_document from moin.wikiutil import anchor_name_from_text from . import default_registry -from ._util import allowed_uri_scheme, decode_data, normalize_split_text +from ._util import decode_data, normalize_split_text from moin import log @@ -638,11 +639,6 @@ def visit_reference(self, node): self.close_moin_page_node() return - if not allowed_uri_scheme(refuri): - # TODO: prepend "wiki.local" as in "moin_in"? - self.visit_error(node) - return - if refuri == "" and "refid" in node: # internal cross-links refid = node["refid"] @@ -652,16 +648,16 @@ def visit_reference(self, node): if isinstance(target_node, nodes.section): title = target_node[0] refid = anchor_name_from_text(title.astext()) - refuri = Iri(scheme="wiki.local", fragment=refid) - - if isinstance(refuri, str) and refuri.startswith("http"): - if "://" not in refuri: - refuri = refuri.split(":")[1] - iri = Iri(refuri) - if iri.scheme is None: - iri.scheme = "wiki.local" - refuri = iri - self.open_moin_page_node(moin_page.a(attrib={xlink.href: refuri})) + iri = Iri(scheme="wiki.local", fragment=refid) + elif refuri.startswith("http") and "://" not in refuri: + # convert links like "http:Home" to wiki-internal references + iri = Iri("wiki.local:" + refuri.split(":", maxsplit=1)[1]) + else: + # ensure a safe scheme, fall back to wiki-internal reference + iri = Iri(refuri) + if iri.scheme not in URI_SCHEMES: + iri = Iri("wiki.local:" + refuri) + self.open_moin_page_node(moin_page.a(attrib={xlink.href: iri})) def depart_reference(self, node): self.close_moin_page_node() From ec47b707227ace94dd46721f7a4be30ee995334b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnter=20Milde?= <milde@users.sf.net> Date: Tue, 9 Dec 2025 12:33:49 +0100 Subject: [PATCH 2/2] Adapt test of "html_in"-converter to new handling of "off-list" URI schemes. Use triple quotes for test string also in test_rst_in.py. --- src/moin/converters/_tests/test_html_in.py | 7 ++++--- src/moin/converters/_tests/test_rst_in.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/moin/converters/_tests/test_html_in.py b/src/moin/converters/_tests/test_html_in.py index 65586e7fa..11c544f69 100644 --- a/src/moin/converters/_tests/test_html_in.py +++ b/src/moin/converters/_tests/test_html_in.py @@ -241,11 +241,12 @@ def test_span_html_element(self, input, xpath): # <page><body><div><p><a xlink:href="http://www.base-url.com/myPage.html">Test</a></p></div></body></page> '/page/body/div/p/a[@xlink:href="http://www.base-url.com/myPage.html"]', ), - # verify invalid or forbidden uri schemes are removed + # only approved URI schemes are used in a "href" + # (others are handled as part of a local item name): ( """<html><p><a href="javascript:alert('hi')">Test</a></p></html>""", - # <page><body><p>javascript:alert('hi')</p></body></page> - """/page/body/p[text()="javascript:alert('hi')"]""", + # <page><body><p><a xlink:href="wiki.local:javascript:alert%28'hi'%29">Text</a></p></body></page> + """/page/body/p/a[text()="Test"][@xlink:href="wiki.local:javascript:alert%28'hi'%29"]""", ), ] diff --git a/src/moin/converters/_tests/test_rst_in.py b/src/moin/converters/_tests/test_rst_in.py index b4d1ba813..5531852ce 100644 --- a/src/moin/converters/_tests/test_rst_in.py +++ b/src/moin/converters/_tests/test_rst_in.py @@ -384,7 +384,7 @@ def test_field_list(self, input, output): ), ( # URI schemes not on the whitelist are interpreted as local wiki item names "`Text <javascript:alert('xss')>`_", - "<page><body><p><a xlink:href=\"wiki.local:javascript:alert%28'xss'%29\">Text</a></p></body></page>", + """<page><body><p><a xlink:href="wiki.local:javascript:alert%28'xss'%29">Text</a></p></body></page>""", ), ]