Skip to content

Commit d2286a4

Browse files
Statistics.stack: Move h/vstack to statistics module.
1 parent bc688c8 commit d2286a4

File tree

6 files changed

+80
-77
lines changed

6 files changed

+80
-77
lines changed

Orange/data/table.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
Domain, Variable, Storage, StringVariable, Unknown, Value, Instance,
1818
ContinuousVariable, DiscreteVariable, MISSING_VALUES
1919
)
20-
from Orange.data.util import SharedComputeValue, vstack, hstack
20+
from Orange.data.util import SharedComputeValue
2121
from Orange.statistics.util import bincount, countnans, contingency, \
22-
stats as fast_stats, sparse_has_implicit_zeros, sparse_count_implicit_zeros, \
23-
sparse_implicit_zero_weights
22+
stats as fast_stats, sparse_has_implicit_zeros, \
23+
sparse_count_implicit_zeros, \
24+
sparse_implicit_zero_weights, vstack, hstack
2425
from Orange.util import flatten
2526

2627
__all__ = ["dataset_dirs", "get_sample_datasets_dir", "RowInstance", "Table"]

Orange/data/util.py

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""
22
Data-manipulation utilities.
33
"""
4-
import numpy as np
54
import bottleneck as bn
6-
from scipy import sparse as sp
5+
import numpy as np
6+
# Backwards compatibility
7+
from Orange.statistics.util import hstack, vstack # pylint: disable=unused-import
78

89

910
def one_hot(values, dtype=float):
@@ -63,29 +64,3 @@ def compute(self, data, shared_data):
6364
"""Given precomputed shared data, perform variable-specific
6465
part of computation and return new variable values."""
6566
raise NotImplementedError
66-
67-
68-
def vstack(arrays):
69-
"""vstack that supports sparse and dense arrays
70-
71-
If all arrays are dense, result is dense. Otherwise,
72-
result is a sparse (csr) array.
73-
"""
74-
if any(sp.issparse(arr) for arr in arrays):
75-
arrays = [sp.csr_matrix(arr) for arr in arrays]
76-
return sp.vstack(arrays)
77-
else:
78-
return np.vstack(arrays)
79-
80-
81-
def hstack(arrays):
82-
"""hstack that supports sparse and dense arrays
83-
84-
If all arrays are dense, result is dense. Otherwise,
85-
result is a sparse (csc) array.
86-
"""
87-
if any(sp.issparse(arr) for arr in arrays):
88-
arrays = [sp.csc_matrix(arr) for arr in arrays]
89-
return sp.hstack(arrays)
90-
else:
91-
return np.hstack(arrays)

Orange/statistics/util.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
It also patches bottleneck to contain these functions.
66
"""
77
from warnings import warn
8-
import numpy as np
9-
import scipy.sparse as sp
8+
109
import bottleneck as bn
10+
import numpy as np
11+
from scipy import sparse as sp
1112

1213

1314
def _count_nans_per_row_sparse(X, weights, dtype=None):
@@ -520,3 +521,29 @@ def var(x, axis=None):
520521
result = x.multiply(x).mean(axis) - np.square(x.mean(axis))
521522
result = np.squeeze(np.asarray(result))
522523
return result
524+
525+
526+
def vstack(arrays):
527+
"""vstack that supports sparse and dense arrays
528+
529+
If all arrays are dense, result is dense. Otherwise,
530+
result is a sparse (csr) array.
531+
"""
532+
if any(sp.issparse(arr) for arr in arrays):
533+
arrays = [sp.csr_matrix(arr) for arr in arrays]
534+
return sp.vstack(arrays)
535+
else:
536+
return np.vstack(arrays)
537+
538+
539+
def hstack(arrays):
540+
"""hstack that supports sparse and dense arrays
541+
542+
If all arrays are dense, result is dense. Otherwise,
543+
result is a sparse (csc) array.
544+
"""
545+
if any(sp.issparse(arr) for arr in arrays):
546+
arrays = [sp.csc_matrix(arr) for arr in arrays]
547+
return sp.hstack(arrays)
548+
else:
549+
return np.hstack(arrays)

Orange/tests/test_statistics.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from scipy.sparse import csr_matrix, issparse, lil_matrix, csc_matrix
77

88
from Orange.statistics.util import bincount, countnans, contingency, stats, \
9-
nanmin, nanmax, unique, nanunique, mean, nanmean, digitize, var
9+
nanmin, nanmax, unique, nanunique, mean, nanmean, digitize, var, vstack, \
10+
hstack
1011

1112

1213
def dense_sparse(test_case):
@@ -156,6 +157,46 @@ def test_var(self):
156157
np.var(data, axis=axis)
157158
)
158159

160+
def assert_correct_array_type(self, array, shape, sparsity):
161+
self.assertEqual(array.shape, shape)
162+
self.assertEqual(["dense", "sparse"][issparse(array)], sparsity)
163+
164+
def test_vstack(self):
165+
numpy = np.array([[1., 2.], [3., 4.]])
166+
csr = csr_matrix(numpy)
167+
csc = csc_matrix(numpy)
168+
169+
self.assert_correct_array_type(
170+
vstack([numpy, numpy]),
171+
shape=(4, 2), sparsity="dense")
172+
self.assert_correct_array_type(
173+
vstack([csr, numpy]),
174+
shape=(4, 2), sparsity="sparse")
175+
self.assert_correct_array_type(
176+
vstack([numpy, csc]),
177+
shape=(4, 2), sparsity="sparse")
178+
self.assert_correct_array_type(
179+
vstack([csc, csr]),
180+
shape=(4, 2), sparsity="sparse")
181+
182+
def test_hstack(self):
183+
numpy = np.array([[1., 2.], [3., 4.]])
184+
csr = csr_matrix(numpy)
185+
csc = csc_matrix(numpy)
186+
187+
self.assert_correct_array_type(
188+
hstack([numpy, numpy]),
189+
shape=(2, 4), sparsity="dense")
190+
self.assert_correct_array_type(
191+
hstack([csr, numpy]),
192+
shape=(2, 4), sparsity="sparse")
193+
self.assert_correct_array_type(
194+
hstack([numpy, csc]),
195+
shape=(2, 4), sparsity="sparse")
196+
self.assert_correct_array_type(
197+
hstack([csc, csr]),
198+
shape=(2, 4), sparsity="sparse")
199+
159200

160201
class TestDigitize(unittest.TestCase):
161202
def setUp(self):

Orange/tests/test_util.py

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
from Orange.util import export_globals, flatten, deprecated, try_, deepgetattr, \
99
OrangeDeprecationWarning
1010
from Orange.data import Table
11-
from Orange.data.util import vstack, hstack
12-
from Orange.statistics.util import stats
11+
from Orange.statistics.util import stats, vstack, hstack
1312

1413
SOMETHING = 0xf00babe
1514

@@ -63,46 +62,6 @@ class a:
6362
self.assertTrue(deepgetattr(a, 'l.__nx__.__x__', 42), 42)
6463
self.assertRaises(AttributeError, lambda: deepgetattr(a, 'l.__nx__.__x__'))
6564

66-
def test_vstack(self):
67-
numpy = np.array([[1., 2.], [3., 4.]])
68-
csr = sp.csr_matrix(numpy)
69-
csc = sp.csc_matrix(numpy)
70-
71-
self.assertCorrectArrayType(
72-
vstack([numpy, numpy]),
73-
shape=(4, 2), sparsity="dense")
74-
self.assertCorrectArrayType(
75-
vstack([csr, numpy]),
76-
shape=(4, 2), sparsity="sparse")
77-
self.assertCorrectArrayType(
78-
vstack([numpy, csc]),
79-
shape=(4, 2), sparsity="sparse")
80-
self.assertCorrectArrayType(
81-
vstack([csc, csr]),
82-
shape=(4, 2), sparsity="sparse")
83-
84-
def test_hstack(self):
85-
numpy = np.array([[1., 2.], [3., 4.]])
86-
csr = sp.csr_matrix(numpy)
87-
csc = sp.csc_matrix(numpy)
88-
89-
self.assertCorrectArrayType(
90-
hstack([numpy, numpy]),
91-
shape=(2, 4), sparsity="dense")
92-
self.assertCorrectArrayType(
93-
hstack([csr, numpy]),
94-
shape=(2, 4), sparsity="sparse")
95-
self.assertCorrectArrayType(
96-
hstack([numpy, csc]),
97-
shape=(2, 4), sparsity="sparse")
98-
self.assertCorrectArrayType(
99-
hstack([csc, csr]),
100-
shape=(2, 4), sparsity="sparse")
101-
102-
def assertCorrectArrayType(self, array, shape, sparsity):
103-
self.assertEqual(array.shape, shape)
104-
self.assertEqual(["dense", "sparse"][sp.issparse(array)], sparsity)
105-
10665
@unittest.skipUnless(os.environ.get('ORANGE_DEPRECATIONS_ERROR'),
10766
'ORANGE_DEPRECATIONS_ERROR not set')
10867
def test_raise_deprecations(self):

Orange/widgets/data/owmergedata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import Orange
1010
from Orange.data import StringVariable, ContinuousVariable
11-
from Orange.data.util import hstack
11+
from Orange.statistics.util import hstack
1212
from Orange.widgets import widget, gui, settings
1313
from Orange.widgets.utils import itemmodels
1414
from Orange.widgets.utils.sql import check_sql_input

0 commit comments

Comments
 (0)