Table.from_table: Obey is_sparse when returning subarrays

nikicc · nikicc · commit 8d7d6faebf7a · 2017-06-02T17:16:42.000+02:00
When we return subarryas, the flag `is_sparse` wasn't considered, but we simpy returned the subarray in it's original format. Also, make sure subarrays aren't flattened to 1d, as it is required for columns.
diff --git a/Orange/data/table.py b/Orange/data/table.py
@@ -268,6 +268,27 @@ def from_table(cls, domain, source, row_indices=...):
 
         def get_columns(row_indices, src_cols, n_rows, dtype=np.float64,
                         is_sparse=False):
+            def match_type(x, force_1d=False):
+                """ Assure that matrix and column are both dense or sparse.
+
+                Args:
+                    x (np.ndarray, scipy.sparse): data 
+                    force_1d (bool): If set, flatten resulting array to 1d.
+
+                Returns:
+                    array of correct density. 
+                """
+                if is_sparse == sp.issparse(x):
+                    return x
+                if is_sparse:
+                    x = np.asarray(x)
+                    return sp.csc_matrix(x.reshape(-1, 1).astype(np.float))
+                x = x.toarray()
+                if force_1d:
+                    x = np.ravel(x)
+                return x
+
+            match_type_1d = lambda x: match_type(x, force_1d=True)
 
             if not len(src_cols):
                 if is_sparse:
@@ -278,33 +299,24 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64,
             n_src_attrs = len(source.domain.attributes)
             if all(isinstance(x, Integral) and 0 <= x < n_src_attrs
                    for x in src_cols):
-                return _subarray(source.X, row_indices, src_cols)
+                return match_type(_subarray(source.X, row_indices, src_cols))
             if all(isinstance(x, Integral) and x < 0 for x in src_cols):
-                arr = _subarray(source.metas, row_indices,
-                                [-1 - x for x in src_cols])
+                arr = match_type(_subarray(source.metas, row_indices,
+                                           [-1 - x for x in src_cols]))
                 if arr.dtype != dtype:
                     return arr.astype(dtype)
                 return arr
             if all(isinstance(x, Integral) and x >= n_src_attrs
                    for x in src_cols):
-                return _subarray(source._Y, row_indices,
-                                 [x - n_src_attrs for x in src_cols])
+                return match_type(_subarray(
+                    source._Y, row_indices,
+                    [x - n_src_attrs for x in src_cols]))
 
             if is_sparse:
                 a = sp.dok_matrix((n_rows, len(src_cols)), dtype=dtype)
             else:
                 a = np.empty((n_rows, len(src_cols)), dtype=dtype)
 
-            def match_type(x):
-                """ Assure that matrix and column are both dense or sparse. """
-                if is_sparse == sp.issparse(x):
-                    return x
-                elif is_sparse:
-                    x = np.asarray(x)
-                    return sp.csc_matrix(x.reshape(-1, 1).astype(np.float))
-                else:
-                    return np.ravel(x.toarray())
-
             shared_cache = _conversion_cache
             for i, col in enumerate(src_cols):
                 if col is None:
@@ -316,22 +328,22 @@ def match_type(x):
                                 col.compute_shared(source)
                         shared = shared_cache[id(col.compute_shared), id(source)]
                         if row_indices is not ...:
-                            a[:, i] = match_type(
+                            a[:, i] = match_type_1d(
                                 col(source, shared_data=shared)[row_indices])
                         else:
-                            a[:, i] = match_type(
+                            a[:, i] = match_type_1d(
                                 col(source, shared_data=shared))
                     else:
                         if row_indices is not ...:
-                            a[:, i] = match_type(col(source)[row_indices])
+                            a[:, i] = match_type_1d(col(source)[row_indices])
                         else:
-                            a[:, i] = match_type(col(source))
+                            a[:, i] = match_type_1d(col(source))
                 elif col < 0:
-                    a[:, i] = match_type(source.metas[row_indices, -1 - col])
+                    a[:, i] = match_type_1d(source.metas[row_indices, -1 - col])
                 elif col < n_src_attrs:
-                    a[:, i] = match_type(source.X[row_indices, col])
+                    a[:, i] = match_type_1d(source.X[row_indices, col])
                 else:
-                    a[:, i] = match_type(
+                    a[:, i] = match_type_1d(
                         source._Y[row_indices, col - n_src_attrs])
 
             if is_sparse: