diff --git a/src/moin/converters/_tests/test_html_in.py b/src/moin/converters/_tests/test_html_in.py index 65586e7fa..11c544f69 100644 --- a/src/moin/converters/_tests/test_html_in.py +++ b/src/moin/converters/_tests/test_html_in.py @@ -241,11 +241,12 @@ def test_span_html_element(self, input, xpath): #

Test

'/page/body/div/p/a[@xlink:href="http://www.base-url.com/myPage.html"]', ), - # verify invalid or forbidden uri schemes are removed + # only approved URI schemes are used in a "href" + # (others are handled as part of a local item name): ( """

Test

""", - #

javascript:alert('hi')

- """/page/body/p[text()="javascript:alert('hi')"]""", + #

Text

+ """/page/body/p/a[text()="Test"][@xlink:href="wiki.local:javascript:alert%28'hi'%29"]""", ), ] diff --git a/src/moin/converters/_tests/test_rst_in.py b/src/moin/converters/_tests/test_rst_in.py index 0c1c4ec0c..5531852ce 100644 --- a/src/moin/converters/_tests/test_rst_in.py +++ b/src/moin/converters/_tests/test_rst_in.py @@ -54,10 +54,6 @@ def setup_class(self): ".

", ), ("a _`Link`", '

a Link

'), - ( - "`Text `_", - '

Text

', - ), ( "Text\n\n~~~~~\n\nTest", '

Text

Test

', @@ -317,13 +313,7 @@ def test_field_list(self, input, output): '

Abra

Abra example arba

', ), ( - """ -Abra example_ arba - -.. _example: -.. _alias: - -text""", + "Abra example_ arba\n\n.. _example:\n.. _alias:\n\ntext", '

Abra example arba

text

', ), ( # A reference_ with no matching target links to a local Wiki item. @@ -336,15 +326,18 @@ def test_field_list(self, input, output): ), ( "`Whitespace is\nnormalized\xA0& CÄSE is Kept.`_", - '

Whitespace is\nnormalized\xA0& CÄSE is Kept.

', + '

' + "Whitespace is\nnormalized\xA0& CÄSE is Kept.

", ), ( # in rST, reference-name matching is case insensitive: "Chapter 1\n===============\n\nA reference to `chapter 1`_.\n", - 'Chapter 1

A reference to chapter 1.

', + 'Chapter 1' + '

A reference to chapter 1.

', ), ( # check handling of non-ASCII chars: "τίτλος\n^^^^^^\n\nA reference to `τίτλος`_.\n", - 'τίτλος

A reference to τίτλος.

', + 'τίτλος' + '

A reference to τίτλος.

', ), ( "§ With % strange & siLLY \n" @@ -352,16 +345,32 @@ def test_field_list(self, input, output): "Reference to `§ With % strange\n" "& siLLY \\<title>`_.\n", '<page><body><h outline-level="1">§ With % strange & siLLY <title></h>' - '<p>Reference to <a xlink:href="wiki.local:#A.2BAKc_With_.25_strange_.26_siLLY_.3Ctitle.3E">§ With % strange\n' + '<p>Reference to <a xlink:href="wiki.local:#A.2BAKc_With_.25_strange_.26_siLLY_.3Ctitle.3E">' + "§ With % strange\n" "& siLLY <title></a>.</p></body></page>", ), ( "http://www.python.org/", '<page><body><p><a xlink:href="http://www.python.org/">http://www.python.org/</a></p></body></page>', ), - ("http:Home", '<page><body><p><a xlink:href="wiki.local:Home">http:Home</a></p></body></page>'), - ("`Home <http:Home>`_", '<page><body><p><a xlink:href="wiki.local:Home">Home</a></p></body></page>'), + ( # legacy syntax for Wiki-internal links (use URI references without scheme instead) + "http:Home", + '<page><body><p><a xlink:href="wiki.local:Home">http:Home</a></p></body></page>', + ), + ("`<http:Home>`__", '<page><body><p><a xlink:href="wiki.local:Home">http:Home</a></p></body></page>'), ( + r"`<https:Home:\ alone>`__", + '<page><body><p><a xlink:href="wiki.local:Home:%20alone">https:Home: alone</a></p></body></page>', + ), + ( # no URI scheme: resolve as wiki-internal link + "`<Home>`__", + '<page><body><p><a xlink:href="wiki.local:Home">Home</a></p></body></page>', + ), + ( + r"`<Home:\ alone>`__", + '<page><body><p><a xlink:href="wiki.local:Home:%20alone">Home: alone</a></p></body></page>', + ), + ( # rST recognizes e-mail addresses "mailto:me@moin.com", '<page><body><p><a xlink:href="mailto:me@moin.com">mailto:me@moin.com</a></p></body></page>', ), @@ -373,6 +382,10 @@ def test_field_list(self, input, output): "`Write to me`_ with your questions.\n\n.. _Write to me: jdoe@example.com", '<page><body><p><a xlink:href="mailto:jdoe@example.com">Write to me</a> with your questions.</p></body></page>', ), + ( # URI schemes not on the whitelist are interpreted as local wiki item names + "`Text <javascript:alert('xss')>`_", + """<page><body><p><a xlink:href="wiki.local:javascript:alert%28'xss'%29">Text</a></p></body></page>""", + ), ] @pytest.mark.usefixtures("_app_ctx") diff --git a/src/moin/converters/docbook_in.py b/src/moin/converters/docbook_in.py index 310052a88..a1ce5aee0 100644 --- a/src/moin/converters/docbook_in.py +++ b/src/moin/converters/docbook_in.py @@ -24,6 +24,7 @@ # in case converters become an independent package flaskg = None +from moin.constants.misc import URI_SCHEMES from moin.utils.iri import Iri from moin.utils.mime import Type, type_moin_document from moin.utils.tree import moin_page, xlink, docbook, xml, html, xinclude @@ -862,8 +863,9 @@ def visit_docbook_link(self, element, depth): if linkend: href = "".join(["#", linkend]) iri = Iri(href) - if iri.scheme is None: - iri.scheme = "wiki.local" + # ensure a safe scheme, fall back to wiki-internal reference: + if iri.scheme not in URI_SCHEMES: + iri = Iri("wiki.local:" + href) attrib[xlink.href] = iri return self.new_copy(moin_page.a, element, depth, attrib) diff --git a/src/moin/converters/html_in.py b/src/moin/converters/html_in.py index 2eb473bc4..04d218adf 100644 --- a/src/moin/converters/html_in.py +++ b/src/moin/converters/html_in.py @@ -18,13 +18,14 @@ from markupsafe import escape +from moin.constants.misc import URI_SCHEMES from moin.i18n import _ from moin.utils.iri import Iri from moin.utils.tree import html, moin_page, xlink, xml from moin.utils.mime import Type, type_moin_document from . import default_registry -from ._util import allowed_uri_scheme, decode_data, normalize_split_text +from ._util import decode_data, normalize_split_text from moin import log @@ -425,21 +426,10 @@ def visit_xhtml_a(self, element): href = element.get(html.href) if self.base_url: href = "".join([self.base_url, href]) - if allowed_uri_scheme(href): - iri = Iri(href) - else: - # URI schemes that are not in the whitelist like: """<a href="javascript:alert('hi')">Test</a>""" - # are converted to: """javascript:alert('hi')""" - # TODO: don't drop the link text, convert to - # - # Test >javascript:alert('hi')< - # - # orr treat the href as wiki-local URI-reference: - # - # href="wiki.local:javascript:alert('hi') - return href - if iri.scheme is None: - iri.scheme = "wiki.local" + iri = Iri(href) + # ensure a safe scheme, fall back to wiki-internal reference + if iri.scheme not in URI_SCHEMES: + iri = Iri("wiki.local:" + href) attrib[key] = iri return self.new_copy(moin_page.a, element, attrib) diff --git a/src/moin/converters/markdown_in.py b/src/moin/converters/markdown_in.py index 4619bb0f1..88dd84494 100644 --- a/src/moin/converters/markdown_in.py +++ b/src/moin/converters/markdown_in.py @@ -372,13 +372,9 @@ def visit_a(self, element): attrib[html.title_] = element.attrib.get("title") href = postproc_text(self.markdown, element.attrib.get("href")) iri = Iri(href) - # iri has authority, fragment, path, query, scheme = none,none,path,none - # Check, if the IRI scheme is whitelisted, - # if not, handle the IRI as wiki-local reference: + # ensure a safe scheme, fall back to wiki-internal reference if iri.scheme not in URI_SCHEMES: - if iri.scheme: - iri.path = f"{iri.scheme}:{iri.path}" - iri.scheme = "wiki.local" + iri = Iri("wiki.local:" + href) attrib[key] = iri return self.new_copy(moin_page.a, element, attrib) diff --git a/src/moin/converters/rst_in.py b/src/moin/converters/rst_in.py index ee180545f..b4661723c 100644 --- a/src/moin/converters/rst_in.py +++ b/src/moin/converters/rst_in.py @@ -28,13 +28,14 @@ # in case converters become an independent package flaskg = None +from moin.constants.misc import URI_SCHEMES from moin.utils.iri import Iri from moin.utils.tree import html, moin_page, xlink, xinclude from moin.utils.mime import Type, type_moin_document from moin.wikiutil import anchor_name_from_text from . import default_registry -from ._util import allowed_uri_scheme, decode_data, normalize_split_text +from ._util import decode_data, normalize_split_text from moin import log @@ -638,11 +639,6 @@ def visit_reference(self, node): self.close_moin_page_node() return - if not allowed_uri_scheme(refuri): - # TODO: prepend "wiki.local" as in "moin_in"? - self.visit_error(node) - return - if refuri == "" and "refid" in node: # internal cross-links refid = node["refid"] @@ -652,16 +648,16 @@ def visit_reference(self, node): if isinstance(target_node, nodes.section): title = target_node[0] refid = anchor_name_from_text(title.astext()) - refuri = Iri(scheme="wiki.local", fragment=refid) - - if isinstance(refuri, str) and refuri.startswith("http"): - if "://" not in refuri: - refuri = refuri.split(":")[1] - iri = Iri(refuri) - if iri.scheme is None: - iri.scheme = "wiki.local" - refuri = iri - self.open_moin_page_node(moin_page.a(attrib={xlink.href: refuri})) + iri = Iri(scheme="wiki.local", fragment=refid) + elif refuri.startswith("http") and "://" not in refuri: + # convert links like "http:Home" to wiki-internal references + iri = Iri("wiki.local:" + refuri.split(":", maxsplit=1)[1]) + else: + # ensure a safe scheme, fall back to wiki-internal reference + iri = Iri(refuri) + if iri.scheme not in URI_SCHEMES: + iri = Iri("wiki.local:" + refuri) + self.open_moin_page_node(moin_page.a(attrib={xlink.href: iri})) def depart_reference(self, node): self.close_moin_page_node()