Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Orange/data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,13 @@ class SharedComputeValue:
A callable that performs computation that is shared between
multiple variables. Variables sharing computation need to set
the same instance.
variable: Orange.data.Variable
The original variable on which this compute value is set.
"""

def __init__(self, compute_shared):
def __init__(self, compute_shared, variable=None):
self.compute_shared = compute_shared
self.variable = variable

def __call__(self, data, shared_data=None):
"""Fallback if common parts are not passed."""
Expand Down
78 changes: 78 additions & 0 deletions Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,81 @@ def weighted_mean():
np.zeros(X.shape[1]),
nans,
X.shape[0] - nans))


def _sparse_has_zeros(x):
""" Check if sparse matrix contains any implicit zeros. """
return np.prod(x.shape) != x.nnz


def _nan_min_max(x, func, axis=0):
if not sp.issparse(x):
return func(x, axis=axis)
if axis is None:
extreme = func(x.data, axis=axis) if x.nnz else float('nan')
if _sparse_has_zeros(x):
extreme = func([0, extreme])
return extreme
if axis == 0:
x = x.T
else:
assert axis == 1

# TODO check & transform to correct format
r = []
for row in x:
values = row.data
extreme = func(values) if values.size else float('nan')
if _sparse_has_zeros(row):
extreme = func([0, extreme])
r.append(extreme)
return np.array(r)


def nanmin(x, axis=None):
""" Equivalent of np.nammin that supports sparse or dense matrices. """
return _nan_min_max(x, np.nanmin, axis)


def nanmax(x, axis=None):
""" Equivalent of np.nammax that supports sparse or dense matrices. """
return _nan_min_max(x, np.nanmax, axis)


def mean(x):
""" Equivalent of np.mean that supports sparse or dense matrices. """
if not sp.issparse(x):
return np.mean(x)

n_values = np.prod(x.shape)
return np.sum(x.data) / n_values


def nanmean(x):
""" Equivalent of np.nanmean that supports sparse or dense matrices. """
if not sp.issparse(x):
return np.nanmean(x)

n_values = np.prod(x.shape) - np.sum(np.isnan(x.data))
return np.nansum(x.data) / n_values


def unique(x, return_counts=True):
""" Equivalent of np.unique that supports sparse or dense matrices. """
if not sp.issparse(x):
return np.unique(x, return_counts=return_counts)

implicit_zeros = np.prod(x.shape) - x.nnz
explicit_zeros = not np.all(x.data)
r = np.unique(x.data, return_counts=return_counts)
if not implicit_zeros:
return r
if return_counts:
if explicit_zeros:
r[1][r[0] == 0.] += implicit_zeros
return r
return np.insert(r[0], 0, 0), np.insert(r[1], 0, implicit_zeros)
else:
if explicit_zeros:
return r
return np.insert(r, 0, 0)
85 changes: 82 additions & 3 deletions Orange/tests/test_statistics.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
import unittest
import warnings
import numpy as np
import scipy as sp
from scipy.sparse import csr_matrix

from Orange.statistics.util import bincount, countnans, contingency, stats
from Orange.statistics.util import bincount, countnans, contingency, stats, \
nanmin, nanmax, unique, mean, nanmean


class TestUtil(unittest.TestCase):
def setUp(self):
nan = float('nan')
self.data = [
np.array([
[0., 1., 0., nan, 3., 5.],
[0., 0., nan, nan, 5., nan],
[0., 0., 0., nan, 7., 6.]]),
np.zeros((2, 3)),
np.ones((2, 3)),
]

def test_bincount(self):
hist, n_nans = bincount([0., 1., np.nan, 3])
self.assertEqual(n_nans, 1)
Expand Down Expand Up @@ -51,8 +65,8 @@ def test_stats_sparse(self):
np.testing.assert_equal(stats(X), [[0, 1, 1/3, 0, 2, 1],
[0, 1, 1/3, 0, 2, 1],
[0, 1, 1/3, 0, 2, 1],
[0, 0, 0, 0, 3, 0],
[0, 0, 0, 0, 3, 0]])
[0, 0, 0, 0, 3, 0],
[0, 0, 0, 0, 3, 0]])

def test_stats_weights(self):
X = np.arange(4).reshape(2, 2).astype(float)
Expand All @@ -79,3 +93,68 @@ def test_stats_non_numeric(self):
np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 1, 2],
[np.inf, -np.inf, 0, 0, 1, 2],
[np.inf, -np.inf, 0, 0, 1, 2]])

def test_nanmin_nanmax(self):
for X in self.data:
X_sparse = csr_matrix(X)
for axis in [None, 0, 1]:
np.testing.assert_array_equal(
nanmin(X, axis=axis),
np.nanmin(X, axis=axis))

np.testing.assert_array_equal(
nanmin(X_sparse, axis=axis),
np.nanmin(X, axis=axis))

np.testing.assert_array_equal(
nanmax(X, axis=axis),
np.nanmax(X, axis=axis))

np.testing.assert_array_equal(
nanmax(X_sparse, axis=axis),
np.nanmax(X, axis=axis))

def test_unique(self):
for X in self.data:
X_sparse = csr_matrix(X)
np.testing.assert_array_equal(
unique(X_sparse, return_counts=False),
np.unique(X, return_counts=False))

for a1, a2 in zip(unique(X_sparse, return_counts=True),
np.unique(X, return_counts=True)):
np.testing.assert_array_equal(a1, a2)

def test_unique_explicit_zeros(self):
x1 = csr_matrix(np.eye(3))
x2 = csr_matrix(np.eye(3))

# set some of-diagonal to explicit zeros
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
category=sp.sparse.SparseEfficiencyWarning)
x2[0, 1] = 0
x2[1, 0] = 0

np.testing.assert_array_equal(
unique(x1, return_counts=False),
unique(x2, return_counts=False),
)
np.testing.assert_array_equal(
unique(x1, return_counts=True),
unique(x2, return_counts=True),
)

def test_mean(self):
for X in self.data:
X_sparse = csr_matrix(X)
np.testing.assert_array_equal(
mean(X_sparse),
np.mean(X))

def test_nanmean(self):
for X in self.data:
X_sparse = csr_matrix(X)
np.testing.assert_array_equal(
nanmean(X_sparse),
np.nanmean(X))
22 changes: 14 additions & 8 deletions Orange/widgets/visualize/ownomogram.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import time
from enum import IntEnum
from collections import OrderedDict

import numpy as np

Expand All @@ -12,6 +13,7 @@
from AnyQt.QtCore import Qt, QEvent, QRectF, QSize

from Orange.data import Table, Domain
from Orange.statistics.util import nanmin, nanmax, mean, unique
from Orange.classification import Model
from Orange.classification.naive_bayes import NaiveBayesModel
from Orange.classification.logistic_regression import \
Expand Down Expand Up @@ -867,11 +869,13 @@ def calculate_log_reg_coefficients(self):
self.log_reg_coeffs = [coeffs[:, ranges[i]] for i in range(len(attrs))]
self.log_reg_coeffs_orig = self.log_reg_coeffs.copy()

for i in range(len(self.log_reg_coeffs)):
min_values = nanmin(self.data.X, axis=0)
max_values = nanmax(self.data.X, axis=0)

for i, min_t, max_t in zip(range(len(self.log_reg_coeffs)),
min_values, max_values):
if self.log_reg_coeffs[i].shape[1] == 1:
coef = self.log_reg_coeffs[i]
min_t = np.nanmin(self.data.X, axis=0)[i]
max_t = np.nanmax(self.data.X, axis=0)[i]
self.log_reg_coeffs[i] = np.hstack((coef * min_t, coef * max_t))
self.log_reg_cont_data_extremes.append(
[sorted([min_t, max_t], reverse=(c < 0)) for c in coef])
Expand Down Expand Up @@ -1080,10 +1084,10 @@ def _init_feature_marker_values(self):
value, feature_val = 0, None
if len(self.log_reg_coeffs):
if attr.is_discrete:
ind, n = np.unique(self.data.X[:, i], return_counts=True)
ind, n = unique(self.data.X[:, i], return_counts=True)
feature_val = np.nan_to_num(ind[np.argmax(n)])
else:
feature_val = np.average(self.data.X[:, i])
feature_val = mean(self.data.X[:, i])
inst_in_dom = instances and attr in instances.domain
if inst_in_dom and not np.isnan(instances[0][attr]):
feature_val = instances[0][attr]
Expand All @@ -1108,13 +1112,15 @@ def send_report(self):

@staticmethod
def reconstruct_domain(original, preprocessed):
attrs = []
# abuse dict to make "in" comparisons faster
attrs = OrderedDict()
for attr in preprocessed.attributes:
cv = attr._compute_value.variable._compute_value
var = cv.variable if cv else original[attr.name]
if var in attrs:
if var in attrs: # the reason for OrderedDict
continue
attrs.append(var)
attrs[var] = None # we only need keys
attrs = list(attrs.keys())
return Domain(attrs, original.class_var, original.metas)

@staticmethod
Expand Down