Skip to content

Commit 70803f4

Browse files
authored
Merge pull request #2644 from markotoplak/reader_select
[ENH] owfile: allow multiple readers with same extension
2 parents 4d5e90c + e6b3cbb commit 70803f4

File tree

5 files changed

+244
-51
lines changed

5 files changed

+244
-51
lines changed

Orange/data/io.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from ast import literal_eval
1111
from collections import OrderedDict, Counter
1212
from functools import lru_cache
13+
from importlib import import_module
1314
from itertools import chain, repeat
1415
from math import isnan
1516
from numbers import Number
@@ -293,11 +294,18 @@ def _ext_to_attr_if_attr2(cls, attr, attr2):
293294
"""
294295
Return ``{ext: `attr`, ...}`` dict if ``cls`` has `attr2`.
295296
If `attr` is '', return ``{ext: cls, ...}`` instead.
297+
298+
If there are multiple formats for an extension, return a format
299+
with the lowest priority.
296300
"""
297-
return OrderedDict((ext, getattr(cls, attr, cls))
298-
for cls in cls.registry.values()
299-
if hasattr(cls, attr2)
300-
for ext in getattr(cls, 'EXTENSIONS', []))
301+
formats = OrderedDict()
302+
for format in sorted(cls.registry.values(), key=lambda x: x.PRIORITY):
303+
if not hasattr(format, attr2):
304+
continue
305+
for ext in getattr(format, 'EXTENSIONS', []):
306+
# Only adds if not yet registered
307+
formats.setdefault(ext, getattr(format, attr, format))
308+
return formats
301309

302310
@property
303311
def names(cls):
@@ -343,7 +351,9 @@ def write_file(cls, filename, data):
343351
iterable (list (rows) of lists of values (cols)).
344352
"""
345353

346-
PRIORITY = 10000 # Sort order in OWSave widget combo box, lower is better
354+
# Priority when multiple formats support the same extension. Also
355+
# the sort order in file open/save combo boxes. Lower is better.
356+
PRIORITY = 10000
347357

348358
def __init__(self, filename):
349359
"""
@@ -762,6 +772,17 @@ def write_data(cls, write, data):
762772
val
763773
for var, val in zip(vars, flatten(row))])
764774

775+
@classmethod
776+
def qualified_name(cls):
777+
return cls.__module__ + '.' + cls.__name__
778+
779+
780+
def class_from_qualified_name(format_name):
781+
""" File format class from qualified name. """
782+
elements = format_name.split(".")
783+
m = import_module(".".join(elements[:-1]))
784+
return getattr(m, elements[-1])
785+
765786

766787
class CSVReader(FileFormat):
767788
"""Reader for comma separated files"""

Orange/tests/test_io.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from Orange.data.io import FileFormat, TabReader, CSVReader, PickleReader
1010
from Orange.data.table import get_sample_datasets_dir
1111

12+
1213
class WildcardReader(FileFormat):
1314
EXTENSIONS = ('.wild', '.wild[0-9]')
1415
DESCRIPTION = "Dummy reader for testing extensions"
@@ -35,6 +36,30 @@ def test_wildcard_extension(self):
3536
FileFormat.get_reader("t.wild2a")
3637

3738

39+
class SameExtension(FileFormat):
40+
PRIORITY = 100
41+
EXTENSIONS = ('.same_extension',)
42+
DESCRIPTION = "Same extension, different priority"
43+
44+
def read(self):
45+
pass
46+
47+
48+
class SameExtensionPreferred(SameExtension):
49+
PRIORITY = 90
50+
51+
52+
class SameExtensionL(SameExtension):
53+
PRIORITY = 110
54+
55+
56+
class TestMultipleSameExtension(unittest.TestCase):
57+
58+
def test_find_reader(self):
59+
reader = FileFormat.get_reader("some.same_extension")
60+
self.assertIsInstance(reader, SameExtensionPreferred)
61+
62+
3863
class TestLocate(unittest.TestCase):
3964

4065
def test_locate_sample_datasets(self):
@@ -49,7 +74,6 @@ def test_locate_sample_datasets(self):
4974
search_dirs=[get_sample_datasets_dir()])
5075
self.assertEqual(os.path.basename(iris), "iris.tab")
5176

52-
5377
def test_locate_wildcard_extension(self):
5478
tempdir = tempfile.mkdtemp()
5579
with self.assertRaises(OSError):

Orange/widgets/data/owfile.py

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@
55

66
import numpy as np
77
from AnyQt.QtWidgets import \
8-
QStyle, QComboBox, QMessageBox, QFileDialog, QGridLayout, QLabel, \
8+
QStyle, QComboBox, QMessageBox, QGridLayout, QLabel, \
99
QLineEdit, QSizePolicy as Policy
1010
from AnyQt.QtCore import Qt, QTimer, QSize
1111

1212
from Orange.canvas.gui.utils import OSX_NSURL_toLocalFile
13-
from Orange.data import StringVariable
1413
from Orange.data.table import Table, get_sample_datasets_dir
15-
from Orange.data.io import FileFormat, UrlReader
14+
from Orange.data.io import FileFormat, UrlReader, class_from_qualified_name
1615
from Orange.widgets import widget, gui
1716
from Orange.widgets.settings import Setting, ContextSetting, \
1817
PerfectDomainContextHandler, SettingProvider
1918
from Orange.widgets.utils.domaineditor import DomainEditor
2019
from Orange.widgets.utils.itemmodels import PyListModel
21-
from Orange.widgets.utils.filedialogs import RecentPathsWComboMixin, dialog_formats
20+
from Orange.widgets.utils.filedialogs import RecentPathsWComboMixin, \
21+
open_filename_dialog
2222
from Orange.widgets.widget import Output
2323

2424
# Backward compatibility: class RecentPath used to be defined in this module,
@@ -40,7 +40,7 @@ def add_origin(examples, filename):
4040
return
4141
vars = examples.domain.variables + examples.domain.metas
4242
strings = [var for var in vars if var.is_string]
43-
dir_name, basename = os.path.split(filename)
43+
dir_name, _ = os.path.split(filename)
4444
for var in strings:
4545
if "type" in var.attributes and "origin" not in var.attributes:
4646
var.attributes["origin"] = dir_name
@@ -112,9 +112,13 @@ class Outputs:
112112
class Warning(widget.OWWidget.Warning):
113113
file_too_big = widget.Msg("The file is too large to load automatically."
114114
" Press Reload to load.")
115+
load_warning = widget.Msg("Read warning:\n{}")
115116

116117
class Error(widget.OWWidget.Error):
117118
file_not_found = widget.Msg("File not found.")
119+
missing_reader = widget.Msg("Missing reader.")
120+
sheet_error = widget.Msg("Error listing available sheets.")
121+
unknown = widget.Msg("Read error:\n{}")
118122

119123
def __init__(self):
120124
super().__init__()
@@ -264,58 +268,60 @@ def browse_file(self, in_demos=False):
264268
else:
265269
start_file = self.last_path() or os.path.expanduser("~/")
266270

267-
filename, _ = QFileDialog.getOpenFileName(
268-
self, 'Open Orange Data File', start_file, dialog_formats())
271+
readers = [f for f in FileFormat.formats
272+
if getattr(f, 'read', None) and getattr(f, "EXTENSIONS", None)]
273+
filename, reader, _ = open_filename_dialog(start_file, None, readers)
269274
if not filename:
270275
return
271276
self.add_path(filename)
277+
if reader is not None:
278+
self.recent_paths[0].file_format = reader.qualified_name()
279+
272280
self.source = self.LOCAL_FILE
273281
self.load_data()
274282

275283
# Open a file, create data from it and send it over the data channel
276284
def load_data(self):
277285
# We need to catch any exception type since anything can happen in
278286
# file readers
279-
# pylint: disable=broad-except
280287
self.closeContext()
281288
self.domain_editor.set_domain(None)
282289
self.apply_button.setEnabled(False)
283290
self.clear_messages()
284291
self.set_file_list()
285-
if self.last_path() and not os.path.exists(self.last_path()):
286-
self.Error.file_not_found()
292+
293+
error = self._try_load()
294+
if error:
295+
error()
296+
self.data = None
297+
self.sheet_box.hide()
287298
self.Outputs.data.send(None)
288299
self.info.setText("No data.")
289-
return
290300

291-
error = None
301+
def _try_load(self):
302+
# pylint: disable=broad-except
303+
if self.last_path() and not os.path.exists(self.last_path()):
304+
return self.Error.file_not_found
305+
292306
try:
293307
self.reader = self._get_reader()
294-
if self.reader is None:
295-
self.data = None
296-
self.Outputs.data.send(None)
297-
self.info.setText("No data.")
298-
self.sheet_box.hide()
299-
return
300-
except Exception as ex:
301-
error = ex
308+
assert self.reader is not None
309+
except Exception:
310+
return self.Error.missing_reader
302311

303-
if not error:
312+
try:
304313
self._update_sheet_combo()
305-
with catch_warnings(record=True) as warnings:
306-
try:
307-
data = self.reader.read()
308-
except Exception as ex:
309-
log.exception(ex)
310-
error = ex
311-
self.warning(warnings[-1].message.args[0] if warnings else '')
314+
except Exception:
315+
return self.Error.sheet_error
312316

313-
if error:
314-
self.data = None
315-
self.Outputs.data.send(None)
316-
self.info.setText("An error occurred:\n{}".format(error))
317-
self.sheet_box.hide()
318-
return
317+
with catch_warnings(record=True) as warnings:
318+
try:
319+
data = self.reader.read()
320+
except Exception as ex:
321+
log.exception(ex)
322+
return lambda x=ex: self.Error.unknown(str(x))
323+
if warnings:
324+
self.Warning.load_warning(warnings[-1].message.args[0])
319325

320326
self.info.setText(self._describe(data))
321327

@@ -333,7 +339,13 @@ def _get_reader(self):
333339
FileFormat
334340
"""
335341
if self.source == self.LOCAL_FILE:
336-
reader = FileFormat.get_reader(self.last_path())
342+
path = self.last_path()
343+
if self.recent_paths and self.recent_paths[0].file_format:
344+
qname = self.recent_paths[0].file_format
345+
reader_class = class_from_qualified_name(qname)
346+
reader = reader_class(path)
347+
else:
348+
reader = FileFormat.get_reader(path)
337349
if self.recent_paths and self.recent_paths[0].sheet:
338350
reader.select_sheet(self.recent_paths[0].sheet)
339351
return reader

Orange/widgets/data/tests/test_owfile.py

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from unittest.mock import Mock, patch
55
import pickle
66
import tempfile
7-
7+
import warnings
88

99
import numpy as np
1010
import scipy.sparse as sp
@@ -16,9 +16,10 @@
1616
import Orange
1717
from Orange.data import FileFormat, dataset_dirs, StringVariable, Table, \
1818
Domain, DiscreteVariable
19+
from Orange.data.io import TabReader
1920
from Orange.tests import named_file
2021
from Orange.widgets.data.owfile import OWFile
21-
from Orange.widgets.utils.filedialogs import dialog_formats
22+
from Orange.widgets.utils.filedialogs import dialog_formats, format_filter, RecentPath
2223
from Orange.widgets.tests.base import WidgetTest
2324
from Orange.widgets.utils.domaineditor import ComboDelegate, VarTypeDelegate, VarTableModel
2425

@@ -33,6 +34,35 @@ def read(self):
3334
pass
3435

3536

37+
class FailedSheetsFormat(FileFormat):
38+
EXTENSIONS = ('.failed_sheet',)
39+
DESCRIPTION = "Make a sheet function that fails"
40+
41+
def read(self):
42+
pass
43+
44+
def sheets(self):
45+
raise Exception("Not working")
46+
47+
48+
class WithWarnings(FileFormat):
49+
EXTENSIONS = ('.with_warning',)
50+
DESCRIPTION = "Warning"
51+
52+
def read(self):
53+
warnings.warn("Some warning")
54+
return Orange.data.Table("iris")
55+
56+
57+
class MyCustomTabReader(FileFormat):
58+
EXTENSIONS = ('.tab',)
59+
DESCRIPTION = "Always return iris"
60+
PRIORITY = 999999
61+
62+
def read(self):
63+
return Orange.data.Table("iris")
64+
65+
3666
class TestOWFile(WidgetTest):
3767
# Attribute used to store event data so it does not get garbage
3868
# collected before event is processed.
@@ -209,6 +239,69 @@ def test_check_datetime_disabled(self):
209239
vartype_delegate.setEditorData(combo, idx(i))
210240
self.assertEqual(combo.count(), counts[i])
211241

242+
def test_reader_custom_tab(self):
243+
with named_file("", suffix=".tab") as fn:
244+
qname = MyCustomTabReader.qualified_name()
245+
reader = RecentPath(fn, None, None, file_format=qname)
246+
self.widget = self.create_widget(OWFile,
247+
stored_settings={"recent_paths": [reader]})
248+
self.widget.load_data()
249+
self.assertFalse(self.widget.Error.missing_reader.is_shown())
250+
outdata = self.get_output(self.widget.Outputs.data)
251+
self.assertEqual(len(outdata), 150) # loaded iris
252+
253+
def test_no_reader_extension(self):
254+
with named_file("", suffix=".xyz_unknown") as fn:
255+
no_reader = RecentPath(fn, None, None)
256+
self.widget = self.create_widget(OWFile,
257+
stored_settings={"recent_paths": [no_reader]})
258+
self.widget.load_data()
259+
self.assertTrue(self.widget.Error.missing_reader.is_shown())
260+
261+
def test_fail_sheets(self):
262+
with named_file("", suffix=".failed_sheet") as fn:
263+
self.open_dataset(fn)
264+
self.assertTrue(self.widget.Error.sheet_error.is_shown())
265+
266+
def test_with_warnings(self):
267+
with named_file("", suffix=".with_warning") as fn:
268+
self.open_dataset(fn)
269+
self.assertTrue(self.widget.Warning.load_warning.is_shown())
270+
271+
def test_fail(self):
272+
with named_file("name\nc\n\nstring", suffix=".tab") as fn:
273+
self.open_dataset(fn)
274+
self.assertTrue(self.widget.Error.unknown.is_shown())
275+
276+
def test_read_format(self):
277+
iris = Table("iris")
278+
279+
def open_iris_with_no_specific_format(a, b, c, filters, e):
280+
return iris.__file__, filters.split(";;")[0]
281+
282+
with patch("AnyQt.QtWidgets.QFileDialog.getOpenFileName",
283+
open_iris_with_no_specific_format):
284+
self.widget.browse_file()
285+
286+
self.assertIsNone(self.widget.recent_paths[0].file_format)
287+
288+
def open_iris_with_tab(a, b, c, filters, e):
289+
return iris.__file__, format_filter(TabReader)
290+
291+
with patch("AnyQt.QtWidgets.QFileDialog.getOpenFileName",
292+
open_iris_with_tab):
293+
self.widget.browse_file()
294+
295+
self.assertEqual(self.widget.recent_paths[0].file_format, "Orange.data.io.TabReader")
296+
297+
def test_no_specified_reader(self):
298+
with named_file("", suffix=".tab") as fn:
299+
no_class = RecentPath(fn, None, None, file_format="not.a.file.reader.class")
300+
self.widget = self.create_widget(OWFile,
301+
stored_settings={"recent_paths": [no_class]})
302+
self.widget.load_data()
303+
self.assertTrue(self.widget.Error.missing_reader.is_shown())
304+
212305
def test_domain_edit_on_sparse_data(self):
213306
iris = Table("iris")
214307
iris.X = sp.csr_matrix(iris.X)

0 commit comments

Comments
 (0)