Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 20 additions & 19 deletions Orange/widgets/data/owcsvimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,21 +579,32 @@ def selectedFileFormat(self) -> FileFormat:

def default_options_for_mime_type(
path: str, mime_type: str
) -> Tuple[csv.Dialect, bool]:
) -> Options:
defaults = {
"text/csv": (csv.excel(), True),
"text/tab-separated-values": (csv.excel_tab(), True)
}
dialect, header = csv.excel(), True
dialect, header, encoding = csv.excel(), True, "utf-8"
delimiters = None
try_encodings = ["utf-8", "utf-16", "iso8859-1"]
if mime_type in defaults:
dialect, header = defaults[mime_type]
delimiters = [dialect.delimiter]
try:
dialect, header = sniff_csv_with_path(path, delimiters=delimiters)
except (OSError, UnicodeDecodeError, csv.Error):
pass
return dialect, header

for encoding_ in try_encodings:
try:
dialect, header = sniff_csv_with_path(
path, encoding=encoding_, delimiters=delimiters)
encoding = encoding_
except (OSError, UnicodeError, csv.Error):
pass
else:
break
if header:
rowspec = [(range(0, 1), RowSpec.Header)]
else:
rowspec = []
return Options(dialect=dialect, encoding=encoding, rowspec=rowspec)


class OWCSVFileImport(widget.OWWidget):
Expand Down Expand Up @@ -910,11 +921,10 @@ def browse(self, prefixname=None, directory=None):
mb = self._might_be_binary_mb(path)
if mb.exec() == QMessageBox.Cancel:
return
# initialize dialect based on selected format
dialect, header = default_options_for_mime_type(
# initialize options based on selected format
options = default_options_for_mime_type(
path, selected_filter.mime_type,
)
options = None
# Search for path in history.
# If found use the stored params to initialize the import dialog
items = self.itemsFromSettings()
Expand All @@ -923,15 +933,6 @@ def browse(self, prefixname=None, directory=None):
_, options_ = items[idx]
if options_ is not None:
options = options_

if options is None:
if not header:
rowspec = []
else:
rowspec = [(range(0, 1), RowSpec.Header)]
options = Options(
encoding="utf-8", dialect=dialect, rowspec=rowspec)

dlg = CSVImportDialog(
self, windowTitle="Import Options", sizeGripEnabled=True)
dlg.setWindowModality(Qt.WindowModal)
Expand Down
36 changes: 22 additions & 14 deletions Orange/widgets/utils/textimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,9 +991,11 @@ def __resetPreview(self):
base = CachedBytesIOWrapper(self.__sample, self.__buffer)

wrapper = io.TextIOWrapper(
base, encoding=self.encoding(), errors="replace"
base, encoding=self.encoding(),
# use surrogate escape to validate/detect encoding errors in
# delegates
errors="surrogateescape"
)

rows = csv.reader(
wrapper, dialect=self.dialect()
)
Expand Down Expand Up @@ -1372,6 +1374,11 @@ def sizeHint(self):
return sh.expandedTo(QSize(8 * hsection, 20 * vsection))


def is_surrogate_escaped(text: str) -> bool:
"""Does `text` contain any surrogate escape characters."""
return any("\udc80" <= c <= "\udcff" for c in text)


class PreviewItemDelegate(QStyledItemDelegate):
def initStyleOption(self, option, index):
# type: (QStyleOptionViewItem, QModelIndex) -> None
Expand All @@ -1389,6 +1396,18 @@ def initStyleOption(self, option, index):
if coltype == ColumnType.Numeric or coltype == ColumnType.Time:
option.displayAlignment = Qt.AlignRight | Qt.AlignVCenter

if not self.validate(option.text):
option.palette.setBrush(
QPalette.All, QPalette.Text, QBrush(Qt.red, Qt.SolidPattern)
)
option.palette.setBrush(
QPalette.All, QPalette.HighlightedText,
QBrush(Qt.red, Qt.SolidPattern)
)

def validate(self, value: str) -> bool: # pylint: disable=no-self-use
return not is_surrogate_escaped(value)

def helpEvent(self, event, view, option, index):
# type: (QHelpEvent, QAbstractItemView, QStyleOptionViewItem, QModelIndex) -> bool
if event.type() == QEvent.ToolTip:
Expand Down Expand Up @@ -1467,17 +1486,6 @@ def __init__(self, *args, converter=None, **kwargs):
super().__init__(*args, **kwargs)
self.converter = converter or float

def initStyleOption(self, option, index):
super().initStyleOption(option, index)
if not self.validate(option.text):
option.palette.setBrush(
QPalette.All, QPalette.Text, QBrush(Qt.red, Qt.SolidPattern)
)
option.palette.setBrush(
QPalette.All, QPalette.HighlightedText,
QBrush(Qt.red, Qt.SolidPattern)
)

def validate(self, value):
if value in {"NA", "Na", "na", "n/a", "N/A", "?", "", "."}:
return True
Expand All @@ -1486,7 +1494,7 @@ def validate(self, value):
except ValueError:
return False
else:
return True
return super().validate(value)


def number_parser(groupsep, decimalsep):
Expand Down