Skip to content

Commit e3db39a

Browse files
Statistics.stack: Move h/vstack to statistics module.
1 parent 5f3d314 commit e3db39a

File tree

6 files changed

+76
-74
lines changed

6 files changed

+76
-74
lines changed

Orange/data/table.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
Domain, Variable, Storage, StringVariable, Unknown, Value, Instance,
1818
ContinuousVariable, DiscreteVariable, MISSING_VALUES
1919
)
20-
from Orange.data.util import SharedComputeValue, vstack, hstack
20+
from Orange.data.util import SharedComputeValue
2121
from Orange.statistics.util import bincount, countnans, contingency, \
22-
stats as fast_stats, sparse_has_implicit_zeros, sparse_count_implicit_zeros, \
23-
sparse_implicit_zero_weights
22+
stats as fast_stats, sparse_has_implicit_zeros, \
23+
sparse_count_implicit_zeros, \
24+
sparse_implicit_zero_weights, vstack, hstack
2425
from Orange.util import flatten
2526

2627
__all__ = ["dataset_dirs", "get_sample_datasets_dir", "RowInstance", "Table"]

Orange/data/util.py

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""
22
Data-manipulation utilities.
33
"""
4-
import numpy as np
54
import bottleneck as bn
6-
from scipy import sparse as sp
5+
import numpy as np
6+
# Backwards compatibility
7+
from Orange.statistics.util import hstack, vstack # pylint: disable=unused-import
78

89

910
def one_hot(values, dtype=float):
@@ -63,29 +64,3 @@ def compute(self, data, shared_data):
6364
"""Given precomputed shared data, perform variable-specific
6465
part of computation and return new variable values."""
6566
raise NotImplementedError
66-
67-
68-
def vstack(arrays):
69-
"""vstack that supports sparse and dense arrays
70-
71-
If all arrays are dense, result is dense. Otherwise,
72-
result is a sparse (csr) array.
73-
"""
74-
if any(sp.issparse(arr) for arr in arrays):
75-
arrays = [sp.csr_matrix(arr) for arr in arrays]
76-
return sp.vstack(arrays)
77-
else:
78-
return np.vstack(arrays)
79-
80-
81-
def hstack(arrays):
82-
"""hstack that supports sparse and dense arrays
83-
84-
If all arrays are dense, result is dense. Otherwise,
85-
result is a sparse (csc) array.
86-
"""
87-
if any(sp.issparse(arr) for arr in arrays):
88-
arrays = [sp.csc_matrix(arr) for arr in arrays]
89-
return sp.hstack(arrays)
90-
else:
91-
return np.hstack(arrays)

Orange/statistics/util.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,3 +519,29 @@ def var(x, axis=None):
519519
result = x.multiply(x).mean(axis) - np.square(x.mean(axis))
520520
result = np.squeeze(np.asarray(result))
521521
return result
522+
523+
524+
def vstack(arrays):
525+
"""vstack that supports sparse and dense arrays
526+
527+
If all arrays are dense, result is dense. Otherwise,
528+
result is a sparse (csr) array.
529+
"""
530+
if any(sp.issparse(arr) for arr in arrays):
531+
arrays = [sp.csr_matrix(arr) for arr in arrays]
532+
return sp.vstack(arrays)
533+
else:
534+
return np.vstack(arrays)
535+
536+
537+
def hstack(arrays):
538+
"""hstack that supports sparse and dense arrays
539+
540+
If all arrays are dense, result is dense. Otherwise,
541+
result is a sparse (csc) array.
542+
"""
543+
if any(sp.issparse(arr) for arr in arrays):
544+
arrays = [sp.csc_matrix(arr) for arr in arrays]
545+
return sp.hstack(arrays)
546+
else:
547+
return np.hstack(arrays)

Orange/tests/test_statistics.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
from scipy.sparse import csr_matrix, issparse, csc_matrix
99

1010
from Orange.statistics.util import bincount, countnans, contingency, stats, \
11-
nanmin, nanmax, unique, nanunique, mean, nanmean, digitize, var
11+
nanmin, nanmax, unique, nanunique, mean, nanmean, digitize, var, vstack, \
12+
hstack
1213

1314

1415
def dense_sparse(test_case):
@@ -192,6 +193,46 @@ def test_var(self):
192193
np.var(data, axis=axis)
193194
)
194195

196+
def assert_correct_array_type(self, array, shape, sparsity):
197+
self.assertEqual(array.shape, shape)
198+
self.assertEqual(["dense", "sparse"][issparse(array)], sparsity)
199+
200+
def test_vstack(self):
201+
numpy = np.array([[1., 2.], [3., 4.]])
202+
csr = csr_matrix(numpy)
203+
csc = csc_matrix(numpy)
204+
205+
self.assert_correct_array_type(
206+
vstack([numpy, numpy]),
207+
shape=(4, 2), sparsity="dense")
208+
self.assert_correct_array_type(
209+
vstack([csr, numpy]),
210+
shape=(4, 2), sparsity="sparse")
211+
self.assert_correct_array_type(
212+
vstack([numpy, csc]),
213+
shape=(4, 2), sparsity="sparse")
214+
self.assert_correct_array_type(
215+
vstack([csc, csr]),
216+
shape=(4, 2), sparsity="sparse")
217+
218+
def test_hstack(self):
219+
numpy = np.array([[1., 2.], [3., 4.]])
220+
csr = csr_matrix(numpy)
221+
csc = csc_matrix(numpy)
222+
223+
self.assert_correct_array_type(
224+
hstack([numpy, numpy]),
225+
shape=(2, 4), sparsity="dense")
226+
self.assert_correct_array_type(
227+
hstack([csr, numpy]),
228+
shape=(2, 4), sparsity="sparse")
229+
self.assert_correct_array_type(
230+
hstack([numpy, csc]),
231+
shape=(2, 4), sparsity="sparse")
232+
self.assert_correct_array_type(
233+
hstack([csc, csr]),
234+
shape=(2, 4), sparsity="sparse")
235+
195236

196237
class TestDigitize(unittest.TestCase):
197238
def setUp(self):

Orange/tests/test_util.py

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from Orange.util import export_globals, flatten, deprecated, try_, deepgetattr, \
99
OrangeDeprecationWarning
1010
from Orange.data import Table
11-
from Orange.data.util import vstack, hstack
1211
from Orange.statistics.util import stats
1312

1413
SOMETHING = 0xf00babe
@@ -63,46 +62,6 @@ class a:
6362
self.assertTrue(deepgetattr(a, 'l.__nx__.__x__', 42), 42)
6463
self.assertRaises(AttributeError, lambda: deepgetattr(a, 'l.__nx__.__x__'))
6564

66-
def test_vstack(self):
67-
numpy = np.array([[1., 2.], [3., 4.]])
68-
csr = sp.csr_matrix(numpy)
69-
csc = sp.csc_matrix(numpy)
70-
71-
self.assertCorrectArrayType(
72-
vstack([numpy, numpy]),
73-
shape=(4, 2), sparsity="dense")
74-
self.assertCorrectArrayType(
75-
vstack([csr, numpy]),
76-
shape=(4, 2), sparsity="sparse")
77-
self.assertCorrectArrayType(
78-
vstack([numpy, csc]),
79-
shape=(4, 2), sparsity="sparse")
80-
self.assertCorrectArrayType(
81-
vstack([csc, csr]),
82-
shape=(4, 2), sparsity="sparse")
83-
84-
def test_hstack(self):
85-
numpy = np.array([[1., 2.], [3., 4.]])
86-
csr = sp.csr_matrix(numpy)
87-
csc = sp.csc_matrix(numpy)
88-
89-
self.assertCorrectArrayType(
90-
hstack([numpy, numpy]),
91-
shape=(2, 4), sparsity="dense")
92-
self.assertCorrectArrayType(
93-
hstack([csr, numpy]),
94-
shape=(2, 4), sparsity="sparse")
95-
self.assertCorrectArrayType(
96-
hstack([numpy, csc]),
97-
shape=(2, 4), sparsity="sparse")
98-
self.assertCorrectArrayType(
99-
hstack([csc, csr]),
100-
shape=(2, 4), sparsity="sparse")
101-
102-
def assertCorrectArrayType(self, array, shape, sparsity):
103-
self.assertEqual(array.shape, shape)
104-
self.assertEqual(["dense", "sparse"][sp.issparse(array)], sparsity)
105-
10665
@unittest.skipUnless(os.environ.get('ORANGE_DEPRECATIONS_ERROR'),
10766
'ORANGE_DEPRECATIONS_ERROR not set')
10867
def test_raise_deprecations(self):

Orange/widgets/data/owmergedata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import Orange
1010
from Orange.data import StringVariable, ContinuousVariable
11-
from Orange.data.util import hstack
11+
from Orange.statistics.util import hstack
1212
from Orange.widgets import widget, gui, settings
1313
from Orange.widgets.utils import itemmodels
1414
from Orange.widgets.utils.sql import check_sql_input

0 commit comments

Comments
 (0)