Skip to content

Commit 5cd773b

Browse files
committed
File: Allow selecting a file with an arbitrary extension
1 parent f3d9e19 commit 5cd773b

File tree

3 files changed

+188
-34
lines changed

3 files changed

+188
-34
lines changed

Orange/widgets/data/owfile.py

Lines changed: 64 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
# module's namespace so that old saved settings still work
3939
from Orange.widgets.utils.filedialogs import RecentPath
4040

41-
DEFAULT_READER_TEXT = "Automatically detect type"
41+
DEFAULT_READER_TEXT = "Determine type from the file extension"
4242

4343
log = logging.getLogger(__name__)
4444

@@ -147,8 +147,11 @@ class Warning(widget.OWWidget.Warning):
147147
class Error(widget.OWWidget.Error):
148148
file_not_found = Msg("File not found.")
149149
missing_reader = Msg("Missing reader.")
150+
select_file_type = Msg("Select file type.")
150151
sheet_error = Msg("Error listing available sheets.")
151152
unknown = Msg("Read error:\n{}")
153+
unknown_select = Msg(
154+
"Read error, possibly due to incorrect choice of file type:\n{}")
152155

153156
UserAdviceMessages = [
154157
widget.Message(
@@ -264,7 +267,7 @@ def package(w):
264267
self.reader_combo = QComboBox(self)
265268
self.reader_combo.setSizePolicy(Policy.Expanding, Policy.Fixed)
266269
self.reader_combo.setMinimumSize(QSize(100, 1))
267-
self.reader_combo.activated[int].connect(self.select_reader)
270+
self.reader_combo.activated[int].connect(self.on_reader_change)
268271

269272
box.layout().addWidget(self.reader_combo)
270273
layout.addWidget(box, 0, 1)
@@ -327,6 +330,10 @@ def select_sheet(self):
327330
self.recent_paths[0].sheet = self.sheet_combo.currentText()
328331
self.load_data()
329332

333+
def on_reader_change(self, n):
334+
self.select_reader(n)
335+
self.load_data()
336+
330337
def select_reader(self, n):
331338
if self.source != self.LOCAL_FILE:
332339
return # ignore for URL's
@@ -335,14 +342,11 @@ def select_reader(self, n):
335342
path = self.recent_paths[0]
336343
if n == 0: # default
337344
path.file_format = None
338-
self.load_data()
339345
elif n <= len(self.available_readers):
340346
reader = self.available_readers[n - 1]
341347
path.file_format = reader.qualified_name()
342-
self.load_data()
343348
else: # the rest include just qualified names
344349
path.file_format = self.reader_combo.itemText(n)
345-
self.load_data()
346350

347351
def _url_set(self):
348352
index = self.url_combo.currentIndex()
@@ -373,7 +377,9 @@ def browse_file(self, in_demos=False):
373377
else:
374378
start_file = self.last_path() or os.path.expanduser("~/")
375379

376-
filename, reader, _ = open_filename_dialog(start_file, None, self.available_readers)
380+
filename, reader, _ = open_filename_dialog(
381+
start_file, None, self.available_readers,
382+
add_all="*")
377383
if not filename:
378384
return
379385
self.add_path(filename)
@@ -415,20 +421,20 @@ def _try_load(self):
415421
if not url:
416422
return self.Information.no_file_selected
417423

418-
def mark_problematic_reader():
419-
self.reader_combo.setItemData(self.reader_combo.currentIndex(),
420-
QBrush(Qt.red), Qt.ForegroundRole)
421-
422424
try:
423425
self.reader = self._get_reader() # also sets current reader index
424426
assert self.reader is not None
425427
except MissingReaderException:
426-
mark_problematic_reader()
427-
return self.Error.missing_reader
428+
if self.reader_combo.currentIndex() > 0:
429+
return self.Error.missing_reader
430+
else:
431+
return self.Error.select_file_type
428432
except Exception as ex:
429-
mark_problematic_reader()
430433
log.exception(ex)
431-
return lambda x=ex: self.Error.unknown(str(x))
434+
if self.reader_combo.currentIndex() > 0:
435+
return lambda x=ex: self.Error.unknown(str(x))
436+
else:
437+
return lambda x=ex: self.Error.unknown_select(str(x))
432438

433439
try:
434440
self._update_sheet_combo()
@@ -439,7 +445,6 @@ def mark_problematic_reader():
439445
try:
440446
data = self.reader.read()
441447
except Exception as ex:
442-
mark_problematic_reader()
443448
log.exception(ex)
444449
return lambda x=ex: self.Error.unknown(str(x))
445450
if warnings:
@@ -455,9 +460,25 @@ def mark_problematic_reader():
455460
return None
456461

457462
def _get_reader(self) -> FileFormat:
463+
"""
464+
Get the reader for the current file.
465+
466+
For local files, this also observes the stored settings and the reader
467+
combo, as follows:
468+
469+
1. If the file format is known (from stored settings), use it and set
470+
the reader combo to the corresponding index (as in settings)
471+
2. Otherwise, detect it from the extension and set the combo to
472+
Auto detect, overriding any previous user-set choice
473+
3. Otherwise, use the current combo state.
474+
475+
Returns:
476+
FileFormat: reader instance
477+
"""
458478
if self.source == self.LOCAL_FILE:
459479
path = self.last_path()
460480
self.reader_combo.setEnabled(True)
481+
461482
if self.recent_paths and self.recent_paths[0].file_format:
462483
qname = self.recent_paths[0].file_format
463484
qname_index = {r.qualified_name(): i for i, r in enumerate(self.available_readers)}
@@ -473,9 +494,20 @@ def _get_reader(self) -> FileFormat:
473494
except Exception as ex:
474495
raise MissingReaderException(f'Can not find reader "{qname}"') from ex
475496
reader = reader_class(path)
497+
476498
else:
477-
self.reader_combo.setCurrentIndex(0)
478-
reader = FileFormat.get_reader(path)
499+
old_idx = self.reader_combo.currentIndex()
500+
try:
501+
self.reader_combo.setCurrentIndex(0)
502+
reader = FileFormat.get_reader(path)
503+
except MissingReaderException:
504+
if old_idx == 0:
505+
raise
506+
# Set the path for the current file format,
507+
# and repeat the call to return the corresponding reader
508+
self.select_reader(old_idx)
509+
return self._get_reader()
510+
479511
if self.recent_paths and self.recent_paths[0].sheet:
480512
reader.select_sheet(self.recent_paths[0].sheet)
481513
return reader
@@ -504,12 +536,21 @@ def _select_active_sheet(self):
504536
self.sheet_combo.setCurrentIndex(0)
505537

506538
def _initialize_reader_combo(self):
507-
self.reader_combo.clear()
508-
filters = [format_filter(f) for f in self.available_readers]
509-
self.reader_combo.addItems([DEFAULT_READER_TEXT] + filters)
510-
self.reader_combo.setCurrentIndex(0)
511-
self.reader_combo.setDisabled(True)
512-
# additional readers may be added in self._get_reader()
539+
# Reset to initial state without losing the current index or
540+
# emitting any signals.
541+
combo = self.reader_combo
542+
if not combo.count():
543+
filters = [format_filter(f) for f in self.available_readers]
544+
combo.addItems([DEFAULT_READER_TEXT] + filters)
545+
combo.setCurrentIndex(0)
546+
else:
547+
# additional readers may be added in self._get_reader()
548+
n = len(self.available_readers) + 1
549+
if combo.currentIndex() >= n:
550+
combo.setCurrentIndex(0)
551+
while combo.count() > n:
552+
combo.removeItem(combo.count() - 1)
553+
combo.setDisabled(True)
513554

514555
@staticmethod
515556
def _describe(table):

Orange/widgets/data/tests/test_owfile.py

Lines changed: 119 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
Domain, DiscreteVariable, ContinuousVariable
2424
from Orange.util import OrangeDeprecationWarning
2525

26-
from Orange.data.io import TabReader
26+
from Orange.data.io import TabReader, XlsReader
2727
from Orange.tests import named_file
2828
from Orange.widgets.data.owfile import OWFile, OWFileDropHandler, DEFAULT_READER_TEXT
2929
from Orange.widgets.utils.filedialogs import dialog_formats, format_filter, RecentPath
@@ -361,13 +361,13 @@ def test_reader_custom_tab(self):
361361
outdata = self.get_output(self.widget.Outputs.data)
362362
self.assertEqual(len(outdata), 150) # loaded iris
363363

364-
def test_no_reader_extension(self):
364+
def test_unknown_extension(self):
365365
with named_file("", suffix=".xyz_unknown") as fn:
366366
no_reader = RecentPath(fn, None, None)
367367
self.widget = self.create_widget(OWFile,
368368
stored_settings={"recent_paths": [no_reader]})
369369
self.widget.load_data()
370-
self.assertTrue(self.widget.Error.missing_reader.is_shown())
370+
self.assertTrue(self.widget.Error.select_file_type.is_shown())
371371

372372
def test_fail_sheets(self):
373373
with named_file("", suffix=".failed_sheet") as fn:
@@ -418,6 +418,22 @@ def test_no_specified_reader(self):
418418
self.assertTrue(self.widget.Error.missing_reader.is_shown())
419419
self.assertEqual(self.widget.reader_combo.currentText(), "not.a.file.reader.class")
420420

421+
422+
def _select_reader(self, name):
423+
reader_combo = self.widget.reader_combo
424+
len_with_qname = len(reader_combo)
425+
for i in range(len_with_qname):
426+
text = reader_combo.itemText(i)
427+
if text.startswith(name):
428+
break
429+
else:
430+
assert f"No reader starts with {name!r}"
431+
reader_combo.setCurrentIndex(i)
432+
reader_combo.activated.emit(i)
433+
434+
def _select_tab_reader(self):
435+
self._select_reader("Tab-separated")
436+
421437
def test_select_reader(self):
422438
filename = FileFormat.locate("iris.tab", dataset_dirs)
423439

@@ -436,12 +452,7 @@ def test_select_reader(self):
436452
self.assertEqual(self.widget.reader_combo.currentText(), "not.a.file.reader.class")
437453
self.assertEqual(self.widget.reader, None)
438454

439-
# select the tab reader
440-
for i in range(len_with_qname):
441-
text = self.widget.reader_combo.itemText(i)
442-
if text.startswith("Tab-separated"):
443-
break
444-
self.widget.reader_combo.activated.emit(i)
455+
self._select_tab_reader()
445456
self.assertEqual(len(self.widget.reader_combo), len_with_qname - 1)
446457
self.assertTrue(self.widget.reader_combo.currentText().startswith("Tab-separated"))
447458
self.assertIsInstance(self.widget.reader, TabReader)
@@ -452,6 +463,105 @@ def test_select_reader(self):
452463
self.assertEqual(self.widget.reader_combo.currentText(), DEFAULT_READER_TEXT)
453464
self.assertIsInstance(self.widget.reader, TabReader)
454465

466+
def test_auto_detect_and_override(self):
467+
tab_as_xlsx = FileFormat.locate("actually-a-tab-file.xlsx", dataset_dirs)
468+
iris = FileFormat.locate("iris", dataset_dirs)
469+
470+
reader_combo = self.widget.reader_combo
471+
472+
reader_combo.setCurrentIndex(0)
473+
reader_combo.activated.emit(0)
474+
assert (self.widget.reader_combo.currentText()
475+
== "Determine type from the file extension")
476+
477+
def open_file(_a, _b, _c, filters, _e):
478+
return filename, filters.split(";;")[0]
479+
480+
with patch("AnyQt.QtWidgets.QFileDialog.getOpenFileName",
481+
open_file):
482+
483+
# Loading a tab file with extension xlsx fails with auto-detect
484+
filename = tab_as_xlsx
485+
self.widget.browse_file()
486+
487+
self.assertEqual(self.widget.reader_combo.currentText(),
488+
"Determine type from the file extension")
489+
self.assertTrue(self.widget.Error.unknown_select.is_shown())
490+
self.assertIsNone(self.get_output(self.widget.Outputs.data))
491+
492+
# Select the tab reader: it should work
493+
self._select_tab_reader()
494+
assert "Tab-separated" in self.widget.reader_combo.currentText()
495+
496+
self.assertFalse(self.widget.Error.unknown_select.is_shown())
497+
self.assertIsInstance(self.widget.reader, TabReader)
498+
self.assertIsNotNone(self.get_output(self.widget.Outputs.data))
499+
500+
# Switching to iris resets the combo to auto-detect
501+
filename = iris
502+
self.widget.browse_file()
503+
self.assertEqual(self.widget.reader_combo.currentText(),
504+
"Determine type from the file extension")
505+
self.assertIsNotNone(self.get_output(self.widget.Outputs.data))
506+
507+
# Taking the tab-as-xlsx file from recent paths should restore
508+
# the file type for that file
509+
self.widget.file_combo.setCurrentIndex(1)
510+
self.widget.file_combo.activated.emit(1)
511+
self.assertIn("Tab-separated", self.widget.reader_combo.currentText())
512+
self.assertIsNotNone(self.get_output(self.widget.Outputs.data))
513+
514+
# Reloading should work
515+
self.widget.load_data()
516+
self.assertIn("Tab-separated", self.widget.reader_combo.currentText())
517+
self.assertIsNotNone(self.get_output(self.widget.Outputs.data))
518+
519+
# Loading this file - not from history - should fail
520+
filename = tab_as_xlsx
521+
self.widget.browse_file()
522+
self.assertTrue(self.widget.Error.unknown_select.is_shown())
523+
self.assertIsNone(self.get_output(self.widget.Outputs.data))
524+
525+
# Set the correct type again (preparation for the next text block)
526+
self._select_tab_reader()
527+
assert not self.widget.Error.unknown_select.is_shown()
528+
assert isinstance(self.widget.reader, TabReader)
529+
assert self.get_output(self.widget.Outputs.data) is not None
530+
531+
# Now load a real Excel file: this is a known excention so the combo
532+
# should return to auto-detect
533+
filename = FileFormat.locate("an_excel_file.xlsx", dataset_dirs)
534+
self.widget.browse_file()
535+
self.assertEqual(self.widget.reader_combo.currentText(),
536+
"Determine type from the file extension")
537+
self.assertFalse(self.widget.Error.unknown_select.is_shown())
538+
self.assertIsNotNone(self.get_output(self.widget.Outputs.data))
539+
540+
# Load iris to prepare for the next test block
541+
filename = iris
542+
self.widget.browse_file()
543+
assert (self.widget.reader_combo.currentText()
544+
== "Determine type from the file extension")
545+
assert self.get_output(self.widget.Outputs.data) is not None
546+
547+
# Files with unknown extensions require manual selection
548+
filename = FileFormat.locate("an_excel_file.foo", dataset_dirs)
549+
self.widget.browse_file()
550+
self.assertTrue(self.widget.Error.select_file_type.is_shown())
551+
self.assertIsNone(self.get_output(self.widget.Outputs.data))
552+
553+
self._select_reader("Excel")
554+
self.assertFalse(self.widget.Error.unknown_select.is_shown())
555+
self.assertFalse(self.widget.Error.select_file_type.is_shown())
556+
self.assertIsNotNone(self.get_output(self.widget.Outputs.data))
557+
558+
# Consecutive loading of files with the same extension keeps selection
559+
filename = FileFormat.locate("an_excel_file-too.foo", dataset_dirs)
560+
self.widget.browse_file()
561+
self.assertFalse(self.widget.Error.unknown_select.is_shown())
562+
self.assertFalse(self.widget.Error.select_file_type.is_shown())
563+
self.assertIsNotNone(self.get_output(self.widget.Outputs.data))
564+
455565
def test_select_reader_errors(self):
456566
filename = FileFormat.locate("iris.tab", dataset_dirs)
457567

i18n/si/msgs.jaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6161,7 +6161,7 @@ widgets/data/owfeaturestatistics.py:
61616161
__main__: false
61626162
iris: false
61636163
widgets/data/owfile.py:
6164-
Automatically detect type: Samodejno zaznaj vrsto datoteke
6164+
Determine type from the file extension: Določi vrsto iz končnice datoteke
61656165
def `add_origin`:
61666166
type: false
61676167
origin: false
@@ -6196,8 +6196,10 @@ widgets/data/owfile.py:
61966196
class `Error`:
61976197
File not found.: Datoteka ni najdena.
61986198
Missing reader.: Bralnik za ta tip ne obstaja.
6199+
Select file type.: Izberite vrsto datoteke.
61996200
Error listing available sheets.: Napaka ob ustvarjanju seznama listov.
62006201
Read error:\n{}: Napaka ob branju:\n{}
6202+
Read error, possibly due to incorrect choice of file type:\n{}: Napaka ob branju, morda zaradi napačne izbire vrste datoteke:\n{}
62016203
'Use CSV File Import widget for advanced options ': 'Uporabi Bralnik CSV za napredne možnosti '
62026204
for comma-separated files: za datoteke ločene z vejico
62036205
use-csv-file-import: falxe
@@ -6232,10 +6234,11 @@ widgets/data/owfile.py:
62326234
File: Datoteka
62336235
Cannot find the directory with documentation datasets: Ne najdem mape s podatki iz dokumentacije
62346236
~/: false
6237+
*: false
62356238
def `load_data`:
62366239
No data.: Ni podatkov.
62376240
def `_get_reader`:
6238-
Can not find reader "{qname}": Bralnik "{qname}" ne obstaja.
6241+
Can not find reader "{qname}": Ne najdem bralnika "{qname}"
62396242
def `_describe`:
62406243
attributes: false
62416244
Name: Ime

0 commit comments

Comments
 (0)