biolab · lanzagar · Jan 6, 2023 · Jan 5, 2023 · Jan 5, 2023 · Jan 5, 2023
diff --git a/Orange/data/table.py b/Orange/data/table.py
@@ -734,13 +734,10 @@ def from_table(cls, domain, source, row_indices=...):
                     table = assure_domain_conversion_sparsity(table, source)
                 return table
 
-            if row_indices is ...:
-                n_rows = len(source)
-            elif isinstance(row_indices, slice):
-                row_indices_range = range(*row_indices.indices(source.X.shape[0]))
-                n_rows = len(row_indices_range)
-            else:
-                n_rows = len(row_indices)
+            # avoid boolean indices; also convert to slices if possible
+            row_indices = _optimize_indices(row_indices, len(source))
+
+            n_rows = _selection_length(row_indices, len(source))
 
             self = cls()
             self.domain = domain
@@ -783,13 +780,8 @@ def from_table(cls, domain, source, row_indices=...):
 
                     while i_done < n_rows:
                         target_indices = slice(i_done, min(n_rows, i_done + PART))
-                        if row_indices is ...:
-                            source_indices = target_indices
-                        elif isinstance(row_indices, slice):
-                            r = row_indices_range[target_indices]
-                            source_indices = slice(r.start, r.stop, r.step)
-                        else:
-                            source_indices = row_indices[target_indices]
+                        source_indices = _select_from_selection(row_indices, target_indices,
+                                                                len(source))
                         part_rows = min(n_rows, i_done+PART) - i_done
 
                         for array_conv in table_conversion.columnwise:
@@ -810,15 +802,9 @@ def from_table(cls, domain, source, row_indices=...):
                         out = cparts if not array_conv.is_sparse else sp.vstack(cparts)
                         setattr(self, array_conv.target, out)
 
-                if source.has_weights():
-                    self.W = source.W[row_indices]
-                else:
-                    self.W = np.empty((n_rows, 0))
+                self.W = source.W[row_indices]
                 self.name = getattr(source, 'name', '')
-                if hasattr(source, 'ids'):
-                    self.ids = source.ids[row_indices]
-                else:
-                    cls._init_ids(self)
+                self.ids = source.ids[row_indices]
                 self.attributes = deepcopy(getattr(source, 'attributes', {}))
                 _idcache_save(_thread_local.conversion_cache, (domain, source), self)
             return self
@@ -876,7 +862,7 @@ def from_table_rows(cls, source, row_indices):
                 self.metas = self.metas.reshape(-1, len(self.domain.metas))
             self.W = source.W[row_indices]
             self.name = getattr(source, 'name', '')
-            self.ids = np.array(source.ids[row_indices])
+            self.ids = source.ids[row_indices]
             self.attributes = deepcopy(getattr(source, 'attributes', {}))
         return self
 
@@ -2421,19 +2407,24 @@ def _subarray(arr, rows, cols):
         # so they need to be reshaped to produce an open mesh
         return arr[np.ix_(rows, cols)]
 
-def _optimize_indices(indices, maxlen):
+
+def _optimize_indices(indices, size):
     """
-    Convert integer indices to slice if possible. It only converts increasing
-    integer ranges with positive steps and valid starts and ends.
-    Only convert valid ends so that invalid ranges will still raise
-    an exception.
+    Convert boolean indices to integer indices and convert these to a slice
+    if possible.
+
+    A slice is created from only from indices with positive steps and
+    valid starts and ends (so that invalid ranges will still raise an
+    exception. An IndexError is raised if boolean indices do not conform
+    to input size.
 
     Allows numpy to reuse the data array, because it defaults to copying
     if given indices.
 
     Parameters
     ----------
     indices : 1D sequence, slice or Ellipsis
+    size : int
     """
     if isinstance(indices, slice):
         return indices
@@ -2450,19 +2441,58 @@ def _optimize_indices(indices, maxlen):
 
     if len(indices) >= 1:
         indices = np.asarray(indices)
-        if indices.dtype != bool:
-            begin = indices[0]
-            end = indices[-1]
-            steps = np.diff(indices) if len(indices) > 1 else np.array([1])
-            step = steps[0]
+        if indices.dtype == bool:
+            if len(indices) == size:
+                indices = np.nonzero(indices)[0]
+            else:
+                # raise an exception that numpy would if boolean indices were used
+                raise IndexError("boolean indices did not match dimension")
+
+    if len(indices) >= 1:  # conversion from boolean indices could result in an empty array
+        begin = indices[0]
+        end = indices[-1]
+        steps = np.diff(indices) if len(indices) > 1 else np.array([1])
+        step = steps[0]
 
-            # continuous ranges with constant step and valid start and stop index can be slices
-            if np.all(steps == step) and step > 0 and begin >= 0 and end < maxlen:
-                return slice(begin, end + step, step)
+        # continuous ranges with constant step and valid start and stop index can be slices
+        if np.all(steps == step) and step > 0 and begin >= 0 and end < size:
+            return slice(begin, end + step, step)
 
     return indices
 
 
+def _selection_length(indices, maxlen):
+    """ Return the selection length.
+    Args:
+        indices: 1D sequence, slice or Ellipsis
+        maxlen: maximum length of the sequence
+    """
+    if indices is ...:
+        return maxlen
+    elif isinstance(indices, slice):
+        return len(range(*indices.indices(maxlen)))
+    else:
+        return len(indices)
+
+
+def _select_from_selection(source_indices, selection_indices, maxlen):
+    """
+    Create efficient selection indices from a previous selection.
+    Try to keep slices as slices.
+    Args:
+        source_indices: 1D sequence, slice or Ellipsis
+        selection_indices: 1D sequence or slice
+        maxlen: maximum length of the sequence
+    """
+    if source_indices is ...:
+        return selection_indices
+    elif isinstance(source_indices, slice):
+        r = range(*source_indices.indices(maxlen))[selection_indices]
+        return slice(r.start, r.stop, r.step)
+    else:
+        return source_indices[selection_indices]
+
+
 def assure_domain_conversion_sparsity(target, source):
     """
     Assure that the table obeys the domain conversion's suggestions about sparsity.

diff --git a/Orange/tests/test_table.py b/Orange/tests/test_table.py
@@ -1819,6 +1819,36 @@ def test_creates_table_with_given_domain_and_row_filter(self):
         self.assert_table_with_filter_matches(
             new_table, self.table[:0], xcols=order[:a], ycols=order[a:a+c], mcols=order[a+c:])
 
+    def test_from_table_with_boolean_row_filter(self):
+        a, c, m = column_sizes(self.table)
+        domain = self.table.domain
+
+        sel = [False]*len(self.table)
+        sel[2] = True
+
+        with patch.object(Table, "from_table_rows", wraps=Table.from_table_rows) \
+                as from_table_rows:
+            new_table = Table.from_table(self.table.domain, self.table, row_indices=sel)
+            from_table_rows.assert_called()
+            self.assert_table_with_filter_matches(
+                new_table, self.table[2:3])
+
+        new_domain1 = Domain(domain.attributes[:1], domain.class_vars[:1], domain.metas[:1])
+        with patch.object(Table, "from_table_rows", wraps=Table.from_table_rows) \
+                as from_table_rows:
+            new_table = Table.from_table(new_domain1, self.table, row_indices=sel)
+            from_table_rows.assert_not_called()
+            self.assert_table_with_filter_matches(
+                new_table, self.table[2:3],
+                xcols=[0], ycols=[a], mcols=[a+c+m-1])
+
+        new_domain2 = Domain(domain.attributes[:1] + (ContinuousVariable("new"),),
+                            domain.class_vars[:1], domain.metas[:1])
+        new_table = Table.from_table(new_domain2, self.table, row_indices=sel)
+        self.assert_table_with_filter_matches(
+            new_table.transform(new_domain1), self.table[2:3],
+            xcols=[0], ycols=[a], mcols=[a+c+m-1])
+
     def test_from_table_sparse_move_some_to_empty_metas(self):
         iris = data.Table("iris").to_sparse()
         new_domain = data.domain.Domain(
@@ -2053,7 +2083,6 @@ def test_can_select_a_subset_of_rows_and_columns(self):
                 np.testing.assert_almost_equal(table.metas,
                                                self.table.metas[r, metas])
 
-
     def test_optimize_indices(self):
         # ordinary conversion
         self.assertEqual(_optimize_indices([1, 2, 3], 4), slice(1, 4, 1))
@@ -2064,8 +2093,14 @@ def test_optimize_indices(self):
         np.testing.assert_equal(_optimize_indices([1, 2, 4], 5), [1, 2, 4])
         np.testing.assert_equal(_optimize_indices((1, 2, 4), 5), [1, 2, 4])
 
-        # leave boolean arrays
-        np.testing.assert_equal(_optimize_indices([True, False, True], 3), [True, False, True])
+        # internally convert boolean arrays into indices
+        np.testing.assert_equal(_optimize_indices([False, False, False, False], 4), [])
+        np.testing.assert_equal(_optimize_indices([True, False, True, True], 4), [0, 2, 3])
+        np.testing.assert_equal(_optimize_indices([True, False, True], 3), slice(0, 4, 2))
+        with self.assertRaises(IndexError):
+            _optimize_indices([True, False, True], 2)
+        with self.assertRaises(IndexError):
+            _optimize_indices([True, False, True], 4)
 
         # do not convert if step is negative
         np.testing.assert_equal(_optimize_indices([4, 2, 0], 5), [4, 2, 0])