Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions orangecontrib/text/import_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pathlib
import re
import yaml
from urllib.parse import quote
from urllib.parse import quote, unquote
from requests.exceptions import ConnectionError

from collections import namedtuple
Expand Down Expand Up @@ -185,10 +185,13 @@ class UrlReader(Reader, CoreUrlReader):

def __init__(self, path, *args):
CoreUrlReader.__init__(self, path)
Reader.__init__(self, self.filename, *args)
Reader.__init__(self, path, *args)

def read_file(self):
self.filename = quote(self.filename, safe="/:")
# unquote prevent double quoting when filename is already quoted
# when not quoted it doesn't change url - it is required since Orange's
# UrlReader quote urls in version 3.29 but not in older versions
self.filename = quote(unquote(self.filename), safe="/:")
self.filename = self._trim(self._resolve_redirects(self.filename))
with contextlib.closing(self.urlopen(self.filename)) as response:
name = self._suggest_filename(
Expand Down
14 changes: 13 additions & 1 deletion orangecontrib/text/tests/test_import_documents.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import unittest
from unittest.mock import patch

import numpy as np
import pandas as pd

from orangecontrib.text.import_documents import ImportDocuments, UrlReader, \
TxtReader, TextData
from pkg_resources import get_distribution


class TestUrlReader(unittest.TestCase):
Expand Down Expand Up @@ -51,6 +51,18 @@ def test_name_text_data(self):
self.assertEqual(text_data.category, "data")
self.assertEqual(text_data.content, "text")

def test_remove_quoting(self):
"""
Since URL quoting is implemented in Orange it can be removed from text
addon when minimal version of Orange is increased to 3.29.1. When this
test start to fail remove the test itself and lines 191 - 194 in
import_documents.py
"""
distribution = get_distribution('orange3-text')
orange_spec = next(x for x in distribution.requires() if x.name == "Orange3")
orange_min_version = tuple(map(int, orange_spec.specs[0][1].split(".")))
self.assertLess(orange_min_version, (3, 29, 1))


class TestImportDocuments(unittest.TestCase):
def test_scan_url(self):
Expand Down