Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Orange/data/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
Domain, Variable, Storage, StringVariable, Unknown, Value, Instance,
ContinuousVariable, DiscreteVariable, MISSING_VALUES
)
from Orange.data.util import SharedComputeValue, vstack, hstack
from Orange.data.util import SharedComputeValue
from Orange.statistics.util import bincount, countnans, contingency, \
stats as fast_stats, sparse_has_implicit_zeros, sparse_count_implicit_zeros, \
sparse_implicit_zero_weights
stats as fast_stats, sparse_has_implicit_zeros, \
sparse_count_implicit_zeros, \
sparse_implicit_zero_weights, vstack, hstack
from Orange.util import flatten

__all__ = ["dataset_dirs", "get_sample_datasets_dir", "RowInstance", "Table"]
Expand Down
31 changes: 3 additions & 28 deletions Orange/data/util.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""
Data-manipulation utilities.
"""
import numpy as np
import bottleneck as bn
from scipy import sparse as sp
import numpy as np
# Backwards compatibility
from Orange.statistics.util import hstack, vstack # pylint: disable=unused-import


def one_hot(values, dtype=float):
Expand Down Expand Up @@ -63,29 +64,3 @@ def compute(self, data, shared_data):
"""Given precomputed shared data, perform variable-specific
part of computation and return new variable values."""
raise NotImplementedError


def vstack(arrays):
"""vstack that supports sparse and dense arrays

If all arrays are dense, result is dense. Otherwise,
result is a sparse (csr) array.
"""
if any(sp.issparse(arr) for arr in arrays):
arrays = [sp.csr_matrix(arr) for arr in arrays]
return sp.vstack(arrays)
else:
return np.vstack(arrays)


def hstack(arrays):
"""hstack that supports sparse and dense arrays

If all arrays are dense, result is dense. Otherwise,
result is a sparse (csc) array.
"""
if any(sp.issparse(arr) for arr in arrays):
arrays = [sp.csc_matrix(arr) for arr in arrays]
return sp.hstack(arrays)
else:
return np.hstack(arrays)
26 changes: 26 additions & 0 deletions Orange/statistics/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,3 +519,29 @@ def var(x, axis=None):
result = x.multiply(x).mean(axis) - np.square(x.mean(axis))
result = np.squeeze(np.asarray(result))
return result


def vstack(arrays):
"""vstack that supports sparse and dense arrays

If all arrays are dense, result is dense. Otherwise,
result is a sparse (csr) array.
"""
if any(sp.issparse(arr) for arr in arrays):
arrays = [sp.csr_matrix(arr) for arr in arrays]
return sp.vstack(arrays)
else:
return np.vstack(arrays)


def hstack(arrays):
"""hstack that supports sparse and dense arrays

If all arrays are dense, result is dense. Otherwise,
result is a sparse (csc) array.
"""
if any(sp.issparse(arr) for arr in arrays):
arrays = [sp.csc_matrix(arr) for arr in arrays]
return sp.hstack(arrays)
else:
return np.hstack(arrays)
43 changes: 42 additions & 1 deletion Orange/tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from scipy.sparse import csr_matrix, issparse, csc_matrix

from Orange.statistics.util import bincount, countnans, contingency, stats, \
nanmin, nanmax, unique, nanunique, mean, nanmean, digitize, var
nanmin, nanmax, unique, nanunique, mean, nanmean, digitize, var, vstack, \
hstack


def dense_sparse(test_case):
Expand Down Expand Up @@ -192,6 +193,46 @@ def test_var(self):
np.var(data, axis=axis)
)

def assert_correct_array_type(self, array, shape, sparsity):
self.assertEqual(array.shape, shape)
self.assertEqual(["dense", "sparse"][issparse(array)], sparsity)

def test_vstack(self):
numpy = np.array([[1., 2.], [3., 4.]])
csr = csr_matrix(numpy)
csc = csc_matrix(numpy)

self.assert_correct_array_type(
vstack([numpy, numpy]),
shape=(4, 2), sparsity="dense")
self.assert_correct_array_type(
vstack([csr, numpy]),
shape=(4, 2), sparsity="sparse")
self.assert_correct_array_type(
vstack([numpy, csc]),
shape=(4, 2), sparsity="sparse")
self.assert_correct_array_type(
vstack([csc, csr]),
shape=(4, 2), sparsity="sparse")

def test_hstack(self):
numpy = np.array([[1., 2.], [3., 4.]])
csr = csr_matrix(numpy)
csc = csc_matrix(numpy)

self.assert_correct_array_type(
hstack([numpy, numpy]),
shape=(2, 4), sparsity="dense")
self.assert_correct_array_type(
hstack([csr, numpy]),
shape=(2, 4), sparsity="sparse")
self.assert_correct_array_type(
hstack([numpy, csc]),
shape=(2, 4), sparsity="sparse")
self.assert_correct_array_type(
hstack([csc, csr]),
shape=(2, 4), sparsity="sparse")


class TestDigitize(unittest.TestCase):
def setUp(self):
Expand Down
41 changes: 0 additions & 41 deletions Orange/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from Orange.util import export_globals, flatten, deprecated, try_, deepgetattr, \
OrangeDeprecationWarning
from Orange.data import Table
from Orange.data.util import vstack, hstack
from Orange.statistics.util import stats

SOMETHING = 0xf00babe
Expand Down Expand Up @@ -63,46 +62,6 @@ class a:
self.assertTrue(deepgetattr(a, 'l.__nx__.__x__', 42), 42)
self.assertRaises(AttributeError, lambda: deepgetattr(a, 'l.__nx__.__x__'))

def test_vstack(self):
numpy = np.array([[1., 2.], [3., 4.]])
csr = sp.csr_matrix(numpy)
csc = sp.csc_matrix(numpy)

self.assertCorrectArrayType(
vstack([numpy, numpy]),
shape=(4, 2), sparsity="dense")
self.assertCorrectArrayType(
vstack([csr, numpy]),
shape=(4, 2), sparsity="sparse")
self.assertCorrectArrayType(
vstack([numpy, csc]),
shape=(4, 2), sparsity="sparse")
self.assertCorrectArrayType(
vstack([csc, csr]),
shape=(4, 2), sparsity="sparse")

def test_hstack(self):
numpy = np.array([[1., 2.], [3., 4.]])
csr = sp.csr_matrix(numpy)
csc = sp.csc_matrix(numpy)

self.assertCorrectArrayType(
hstack([numpy, numpy]),
shape=(2, 4), sparsity="dense")
self.assertCorrectArrayType(
hstack([csr, numpy]),
shape=(2, 4), sparsity="sparse")
self.assertCorrectArrayType(
hstack([numpy, csc]),
shape=(2, 4), sparsity="sparse")
self.assertCorrectArrayType(
hstack([csc, csr]),
shape=(2, 4), sparsity="sparse")

def assertCorrectArrayType(self, array, shape, sparsity):
self.assertEqual(array.shape, shape)
self.assertEqual(["dense", "sparse"][sp.issparse(array)], sparsity)

@unittest.skipUnless(os.environ.get('ORANGE_DEPRECATIONS_ERROR'),
'ORANGE_DEPRECATIONS_ERROR not set')
def test_raise_deprecations(self):
Expand Down
2 changes: 1 addition & 1 deletion Orange/widgets/data/owmergedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import Orange
from Orange.data import StringVariable, ContinuousVariable
from Orange.data.util import hstack
from Orange.statistics.util import hstack
from Orange.widgets import widget, gui, settings
from Orange.widgets.utils import itemmodels
from Orange.widgets.utils.sql import check_sql_input
Expand Down