biolab · ales-erjavec · Jan 12, 2022 · Jan 14, 2022 · VesnaT · Jan 14, 2022
diff --git a/Orange/data/io.py b/Orange/data/io.py
@@ -13,8 +13,7 @@
 
 from os import path, remove
 from tempfile import NamedTemporaryFile
-from urllib.parse import urlparse, urlsplit, urlunsplit, \
-    unquote as urlunquote, quote
+from urllib.parse import urlparse, urlsplit, urlunsplit, unquote as urlunquote
 from urllib.request import urlopen, Request
 from pathlib import Path
 
@@ -406,7 +405,6 @@ def __init__(self, filename):
         filename = filename.strip()
         if not urlparse(filename).scheme:
             filename = 'http://' + filename
-        filename = quote(filename, safe="/:")
         super().__init__(filename)
 
     @staticmethod

diff --git a/Orange/tests/test_url_reader.py b/Orange/tests/test_url_reader.py
@@ -17,14 +17,3 @@ def test_zipped(self):
             "http://datasets.biolab.si/core/philadelphia-crime.csv.xz"
         ).read()
         self.assertEqual(9666, len(data))
-
-    def test_special_characters(self):
-        # TO-DO - replace this file with a more appropriate one (e.g. .csv)
-        #  and change the assertion accordingly
-        path = "http://file.biolab.si/text-semantics/data/elektrotehniski-" \
-               "vestnik-clanki/detektiranje-utrdb-v-šahu-.txt"
-        self.assertRaises(OSError, UrlReader(path).read)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/Orange/widgets/data/owfile.py b/Orange/widgets/data/owfile.py
@@ -470,6 +470,10 @@ def _get_reader(self) -> FileFormat:
             return reader
         else:
             url = self.url_combo.currentText().strip()
+            # tolerant parse, also prevent parsing as local file
+            parsed = QUrl.fromUserInput(url, os.devnull)
+            if parsed.isValid():
+                url = bytes(parsed.toEncoded()).decode("ascii")
             return UrlReader(url)
 
     def _update_sheet_combo(self):

diff --git a/Orange/widgets/data/tests/test_owfile.py b/Orange/widgets/data/tests/test_owfile.py
@@ -577,6 +577,21 @@ def test_url_no_scheme(self):
 
         mock_urlreader.assert_called_once_with('http://' + url)
 
+    def test_url_encode(self):
+        def test(text, expected):
+            mock_urlreader = Mock()
+            with patch('Orange.widgets.data.owfile.UrlReader', mock_urlreader):
+                self.widget.url_combo.insertItem(0, text)
+                self.widget.url_combo.setCurrentIndex(0)
+                self.widget.url_combo.activated.emit(0)
+            mock_urlreader.assert_called_once_with(expected)
+
+        test("https://example.com/space space#f=f", "https://example.com/space%20space#f=f")
+        test("https://example.com/space%20space#f=f", "https://example.com/space%20space#f=f")
+        test("https://š.si/š#f=1", "https://xn--pga.si/%C5%A1#f=1")
+        test("https://xn--pga.si/%C5%A1#f=1", "https://xn--pga.si/%C5%A1#f=1")
+        test("https://example.com/a?q=1#f=1", "https://example.com/a?q=1#f=1")
+
     def test_adds_origin(self):
         self.open_dataset("origin1/images")
         data1 = self.get_output(self.widget.Outputs.data)