Skip to content

Commit f0fadfc

Browse files
authored
Merge pull request #661 from PrimozGodec/file-import
Import documents - prevent double quoting
2 parents 5461fa1 + f49d85d commit f0fadfc

File tree

2 files changed

+19
-4
lines changed

2 files changed

+19
-4
lines changed

orangecontrib/text/import_documents.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pathlib
66
import re
77
import yaml
8-
from urllib.parse import quote
8+
from urllib.parse import quote, unquote
99
from requests.exceptions import ConnectionError
1010

1111
from collections import namedtuple
@@ -185,10 +185,13 @@ class UrlReader(Reader, CoreUrlReader):
185185

186186
def __init__(self, path, *args):
187187
CoreUrlReader.__init__(self, path)
188-
Reader.__init__(self, self.filename, *args)
188+
Reader.__init__(self, path, *args)
189189

190190
def read_file(self):
191-
self.filename = quote(self.filename, safe="/:")
191+
# unquote prevent double quoting when filename is already quoted
192+
# when not quoted it doesn't change url - it is required since Orange's
193+
# UrlReader quote urls in version 3.29 but not in older versions
194+
self.filename = quote(unquote(self.filename), safe="/:")
192195
self.filename = self._trim(self._resolve_redirects(self.filename))
193196
with contextlib.closing(self.urlopen(self.filename)) as response:
194197
name = self._suggest_filename(

orangecontrib/text/tests/test_import_documents.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import unittest
2-
from unittest.mock import patch
32

43
import numpy as np
54
import pandas as pd
65

76
from orangecontrib.text.import_documents import ImportDocuments, UrlReader, \
87
TxtReader, TextData
8+
from pkg_resources import get_distribution
99

1010

1111
class TestUrlReader(unittest.TestCase):
@@ -51,6 +51,18 @@ def test_name_text_data(self):
5151
self.assertEqual(text_data.category, "data")
5252
self.assertEqual(text_data.content, "text")
5353

54+
def test_remove_quoting(self):
55+
"""
56+
Since URL quoting is implemented in Orange it can be removed from text
57+
addon when minimal version of Orange is increased to 3.29.1. When this
58+
test start to fail remove the test itself and lines 191 - 194 in
59+
import_documents.py
60+
"""
61+
distribution = get_distribution('orange3-text')
62+
orange_spec = next(x for x in distribution.requires() if x.name == "Orange3")
63+
orange_min_version = tuple(map(int, orange_spec.specs[0][1].split(".")))
64+
self.assertLess(orange_min_version, (3, 29, 1))
65+
5466

5567
class TestImportDocuments(unittest.TestCase):
5668
def test_scan_url(self):

0 commit comments

Comments
 (0)