diff --git a/Orange/widgets/data/owfeaturestatistics.py b/Orange/widgets/data/owfeaturestatistics.py index a543fea8b4d..f9a60ea1e6b 100644 --- a/Orange/widgets/data/owfeaturestatistics.py +++ b/Orange/widgets/data/owfeaturestatistics.py @@ -5,6 +5,7 @@ or quartile coefficient of dispersion (Q3 - Q1) / (Q3 + Q1) - Standard deviation for nominal: try out Variation ratio (1 - n_mode/N) """ + import datetime import locale from enum import IntEnum @@ -12,6 +13,7 @@ import numpy as np import scipy.stats as ss +import scipy.sparse as sp from AnyQt.QtCore import Qt, QSize, QRectF, QVariant, QModelIndex, pyqtSlot, \ QRegExp, QItemSelection, QItemSelectionRange, QItemSelectionModel from AnyQt.QtGui import QPainter, QColor @@ -236,9 +238,18 @@ def __compute_statistics(self): continuous_f=lambda x: ut.nanmax(x, axis=0), time_f=lambda x: ut.nanmax(x, axis=0), ) + + # Since scipy apparently can't do mode on sparse matrices, cast it to + # dense. This can be very inefficient for large matrices, and should + # be changed + def __mode(x, *args, **kwargs): + if sp.issparse(x): + x = x.todense(order="C") + return ss.mode(x, *args, **kwargs)[0] + self._center = self.__compute_stat( matrices, - discrete_f=lambda x: ss.mode(x)[0], + discrete_f=lambda x: __mode(x, axis=0), continuous_f=lambda x: ut.nanmean(x, axis=0), time_f=lambda x: ut.nanmean(x, axis=0), )