Skip to content

Commit b77e976

Browse files
committed
Utils: Introduce common dense/sparse operations
1 parent 6bf05ae commit b77e976

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

Orange/data/util.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,58 @@
22
Data-manipulation utilities.
33
"""
44
import numpy as np
5+
import scipy.sparse as sp
56
import bottleneck as bn
67

78

9+
def _nan_min_max(x, axis=0, func=None):
10+
if not sp.issparse(x):
11+
return func(x, axis=axis)
12+
else:
13+
r = []
14+
if axis == 0:
15+
x = x.T
16+
17+
# TODO check & transform to correct format
18+
19+
for row in x:
20+
values = row.data
21+
have_zeros = np.prod(row.shape) != values.size
22+
extreme = func(values)
23+
if have_zeros:
24+
extreme = func([0, extreme])
25+
r.append(extreme)
26+
return np.array(r)
27+
28+
29+
def nan_min(x, axis):
30+
return _nan_min_max(x, axis, np.nanmin)
31+
32+
33+
def nan_max(x, axis):
34+
return _nan_min_max(x, axis, np.nanmax)
35+
36+
37+
def nan_average(x):
38+
if not sp.issparse(x):
39+
return np.average(x)
40+
else:
41+
n_values = np.prod(x.shape) - np.sum(np.isnan(x.data))
42+
return np.nansum(x.data) / n_values
43+
44+
45+
def unique(x, return_counts=True):
46+
if not sp.issparse(x):
47+
return np.unique(x, return_counts=return_counts)
48+
else:
49+
n_zeros = np.prod(x.shape) - x.data.size
50+
r = np.unique(x.data, return_counts=return_counts)
51+
if return_counts:
52+
return np.insert(r[0], 0, 0), np.insert(r[1], 0, n_zeros)
53+
else:
54+
return np.insert(r, 0, 0)
55+
56+
857
def one_hot(values, dtype=float):
958
"""Return a one-hot transform of values
1059

0 commit comments

Comments
 (0)