diff --git a/Orange/data/io.py b/Orange/data/io.py index 1a952bcd0cf..8510678ddcf 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -13,8 +13,7 @@ from os import path, remove from tempfile import NamedTemporaryFile -from urllib.parse import urlparse, urlsplit, urlunsplit, \ - unquote as urlunquote, quote +from urllib.parse import urlparse, urlsplit, urlunsplit, unquote as urlunquote from urllib.request import urlopen, Request from pathlib import Path @@ -406,7 +405,6 @@ def __init__(self, filename): filename = filename.strip() if not urlparse(filename).scheme: filename = 'http://' + filename - filename = quote(filename, safe="/:") super().__init__(filename) @staticmethod diff --git a/Orange/tests/test_url_reader.py b/Orange/tests/test_url_reader.py index c40bf26a2b5..87b3c1d73aa 100644 --- a/Orange/tests/test_url_reader.py +++ b/Orange/tests/test_url_reader.py @@ -17,14 +17,3 @@ def test_zipped(self): "http://datasets.biolab.si/core/philadelphia-crime.csv.xz" ).read() self.assertEqual(9666, len(data)) - - def test_special_characters(self): - # TO-DO - replace this file with a more appropriate one (e.g. .csv) - # and change the assertion accordingly - path = "http://file.biolab.si/text-semantics/data/elektrotehniski-" \ - "vestnik-clanki/detektiranje-utrdb-v-šahu-.txt" - self.assertRaises(OSError, UrlReader(path).read) - - -if __name__ == "__main__": - unittest.main() diff --git a/Orange/widgets/data/owfile.py b/Orange/widgets/data/owfile.py index 9c050a6e6d5..cd488f804e6 100644 --- a/Orange/widgets/data/owfile.py +++ b/Orange/widgets/data/owfile.py @@ -470,6 +470,10 @@ def _get_reader(self) -> FileFormat: return reader else: url = self.url_combo.currentText().strip() + # tolerant parse, also prevent parsing as local file + parsed = QUrl.fromUserInput(url, os.devnull) + if parsed.isValid(): + url = bytes(parsed.toEncoded()).decode("ascii") return UrlReader(url) def _update_sheet_combo(self): diff --git a/Orange/widgets/data/tests/test_owfile.py b/Orange/widgets/data/tests/test_owfile.py index 8a533470ed7..4f1f1d482f0 100644 --- a/Orange/widgets/data/tests/test_owfile.py +++ b/Orange/widgets/data/tests/test_owfile.py @@ -577,6 +577,21 @@ def test_url_no_scheme(self): mock_urlreader.assert_called_once_with('http://' + url) + def test_url_encode(self): + def test(text, expected): + mock_urlreader = Mock() + with patch('Orange.widgets.data.owfile.UrlReader', mock_urlreader): + self.widget.url_combo.insertItem(0, text) + self.widget.url_combo.setCurrentIndex(0) + self.widget.url_combo.activated.emit(0) + mock_urlreader.assert_called_once_with(expected) + + test("https://example.com/space space#f=f", "https://example.com/space%20space#f=f") + test("https://example.com/space%20space#f=f", "https://example.com/space%20space#f=f") + test("https://š.si/š#f=1", "https://xn--pga.si/%C5%A1#f=1") + test("https://xn--pga.si/%C5%A1#f=1", "https://xn--pga.si/%C5%A1#f=1") + test("https://example.com/a?q=1#f=1", "https://example.com/a?q=1#f=1") + def test_adds_origin(self): self.open_dataset("origin1/images") data1 = self.get_output(self.widget.Outputs.data)