Skip to content

Commit 755426e

Browse files
authored
Merge pull request #2457 from pavlin-policar/table-sparse-target
[FIX] Table: Fix printing data with sparse Y
2 parents ac078a4 + 830e53a commit 755426e

File tree

2 files changed

+58
-14
lines changed

2 files changed

+58
-14
lines changed

Orange/data/table.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,24 +114,35 @@ def __setitem__(self, key, value):
114114
def _str(self, limit):
115115
def sp_values(matrix, variables):
116116
if not sp.issparse(matrix):
117+
if matrix.ndim == 1:
118+
matrix = matrix[:, np.newaxis]
117119
return Instance.str_values(matrix[row], variables, limit)
118-
begptr, endptr = matrix.indptr[row:row + 2]
119-
rendptr = endptr if not limit else min(endptr, begptr + 5)
120-
variables = [variables[var]
121-
for var in matrix.indices[begptr:rendptr]]
122-
s = ", ".join(
123-
"{}={}".format(var.name, var.str_val(val))
124-
for var, val in zip(variables, matrix.data[begptr:rendptr]))
125-
if limit and rendptr != endptr:
120+
121+
row_entries, idx = [], 0
122+
while idx < len(variables):
123+
# Make sure to stop printing variables if we limit the output
124+
if limit and len(row_entries) >= 5:
125+
break
126+
127+
var = variables[idx]
128+
if var.is_discrete or matrix[row, idx]:
129+
row_entries.append("%s=%s" % (var.name, var.str_val(matrix[row, idx])))
130+
131+
idx += 1
132+
133+
s = ", ".join(row_entries)
134+
135+
if limit and idx < len(variables):
126136
s += ", ..."
137+
127138
return s
128139

129140
table = self.table
130141
domain = table.domain
131142
row = self.row_index
132143
s = "[" + sp_values(table.X, domain.attributes)
133144
if domain.class_vars:
134-
s += " | " + sp_values(table._Y, domain.class_vars)
145+
s += " | " + sp_values(table.Y, domain.class_vars)
135146
s += "]"
136147
if self._domain.metas:
137148
s += " {" + sp_values(table.metas, domain.metas) + "}"
@@ -178,6 +189,8 @@ def Y(self):
178189
def Y(self, value):
179190
if len(value.shape) == 1:
180191
value = value[:, None]
192+
if sp.issparse(value) and len(self) != value.shape[0]:
193+
value = value.T
181194
self._Y = value
182195

183196
def __new__(cls, *args, **kwargs):
@@ -825,7 +838,7 @@ def __len__(self):
825838
return self.X.shape[0]
826839

827840
def __str__(self):
828-
return "[" + ",\n ".join(str(ex) for ex in self)
841+
return "[" + ",\n ".join(str(ex) for ex in self) + "]"
829842

830843
def __repr__(self):
831844
head = 5
@@ -1498,10 +1511,8 @@ def _compute_contingency(self, col_vars=None, row_var=None):
14981511

14991512
for col_i, arr_i, _ in cont_vars:
15001513
if sp.issparse(arr):
1501-
col_data = arr.data[
1502-
arr.indptr[arr_i]:arr.indptr[arr_i + 1]]
1503-
rows = arr.indices[
1504-
arr.indptr[arr_i]:arr.indptr[arr_i + 1]]
1514+
col_data = arr.data[arr.indptr[arr_i]:arr.indptr[arr_i + 1]]
1515+
rows = arr.indices[arr.indptr[arr_i]:arr.indptr[arr_i + 1]]
15051516
W_ = None if W is None else W[rows]
15061517
classes_ = classes[rows]
15071518
else:

Orange/tests/test_sparse_table.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# Test methods with long descriptive names can omit docstrings
22
# pylint: disable=missing-docstring
33

4+
import numpy as np
45
from scipy.sparse import csr_matrix
56

67
from Orange import data
8+
from Orange.data import Table
79
from Orange.tests import test_table as tabletests
810

911

@@ -41,3 +43,34 @@ def test_row_assignment(self):
4143

4244
def test_value_assignment(self):
4345
super().test_value_assignment()
46+
47+
def test_str(self):
48+
iris = Table('iris')
49+
iris.X, iris.Y = csr_matrix(iris.X), csr_matrix(iris.Y)
50+
str(iris)
51+
52+
def test_Y_setter_1d(self):
53+
iris = Table('iris')
54+
assert iris.Y.shape == (150,)
55+
iris.Y = csr_matrix(iris.Y)
56+
# We expect the Y shape to match the X shape, which is (150, 4) in iris
57+
self.assertEqual(iris.Y.shape, (150, 1))
58+
59+
def test_Y_setter_2d(self):
60+
iris = Table('iris')
61+
assert iris.Y.shape == (150,)
62+
# Convert iris.Y to (150, 1) shape
63+
new_y = iris.Y[:, np.newaxis]
64+
iris.Y = np.hstack((new_y, new_y))
65+
iris.Y = csr_matrix(iris.Y)
66+
# We expect the Y shape to match the X shape, which is (150, 4) in iris
67+
self.assertEqual(iris.Y.shape, (150, 2))
68+
69+
def test_Y_setter_2d_single_instance(self):
70+
iris = Table('iris')[:1]
71+
# Convert iris.Y to (1, 1) shape
72+
new_y = iris.Y[:, np.newaxis]
73+
iris.Y = np.hstack((new_y, new_y))
74+
iris.Y = csr_matrix(iris.Y)
75+
# We expect the Y shape to match the X shape, which is (1, 4) in iris
76+
self.assertEqual(iris.Y.shape, (1, 2))

0 commit comments

Comments
 (0)