Striveworks
diff --git a/‎src/valor_lite/classification/manager.py‎
Lines changed: 54 additions & 32 deletions b/‎src/valor_lite/classification/manager.py‎
Lines changed: 54 additions & 32 deletions
diff --git a/‎src/valor_lite/object_detection/manager.py‎
Lines changed: 106 additions & 42 deletions b/‎src/valor_lite/object_detection/manager.py‎
Lines changed: 106 additions & 42 deletions
@@ -144,18 +144,18 @@ def missing_prediction_labels(self) -> list[str]:
 
     def create_filter(
         self,
-        datum_ids: list[str] | None = None,
-        labels: list[str] | None = None,
+        datums: list[str] | NDArray[np.int32] | None = None,
+        labels: list[str] | NDArray[np.int32] | None = None,
     ) -> Filter:
         """
         Creates a filter object.
 
         Parameters
         ----------
-        datum_uids : list[str], optional
-            An optional list of string uids representing datums.
-        labels : list[str], optional
-            An optional list of labels.
+        datums : list[str] | NDArray[int32], optional
+            An optional list of string uids or integer indices representing datums.
+        labels : list[str] | NDArray[int32], optional
+            An optional list of strings or integer indices representing labels.
 
         Returns
         -------
@@ -165,50 +165,72 @@ def create_filter(
         # create datum mask
         n_pairs = self._detailed_pairs.shape[0]
         datum_mask = np.ones(n_pairs, dtype=np.bool_)
-        if datum_ids is not None:
-            if not datum_ids:
-                return Filter(
-                    datum_mask=np.zeros_like(datum_mask),
-                    valid_label_indices=None,
-                    metadata=Metadata(),
+        if datums is not None:
+            # convert to array of valid datum indices
+            if isinstance(datums, list):
+                datums = np.array(
+                    [self.datum_id_to_index[uid] for uid in datums],
+                    dtype=np.int32,
                 )
-            valid_datum_indices = np.array(
-                [self.datum_id_to_index[uid] for uid in datum_ids],
-                dtype=np.int32,
-            )
-            datum_mask = np.isin(
-                self._detailed_pairs[:, 0], valid_datum_indices
-            )
+
+            # return early if all data removed
+            if datums.size == 0:
+                raise EmptyFilterError("filter removes all datums")
+
+            # validate indices
+            if datums.max() >= len(self.index_to_datum_id):
+                raise ValueError(
+                    f"datum index '{datums.max()}' exceeds total number of datums"
+                )
+            elif datums.min() < 0:
+                raise ValueError(
+                    f"datum index '{datums.min()}' is a negative value"
+                )
+
+            # create datum mask
+            datum_mask = np.isin(self._detailed_pairs[:, 0], datums)
 
         # collect valid label indices
-        valid_label_indices = None
         if labels is not None:
-            if not labels:
-                return Filter(
-                    datum_mask=datum_mask,
-                    valid_label_indices=np.array([], dtype=np.int32),
-                    metadata=Metadata(),
+            # convert to array of valid label indices
+            if isinstance(labels, list):
+                labels = np.array(
+                    [self.label_to_index[label] for label in labels]
                 )
-            valid_label_indices = np.array(
-                [self.label_to_index[label] for label in labels] + [-1]
-            )
+
+            # return early if all data removed
+            if labels.size == 0:
+                raise EmptyFilterError("filter removes all labels")
+
+            # validate indices
+            if labels.max() >= len(self.index_to_label):
+                raise ValueError(
+                    f"label index '{labels.max()}' exceeds total number of labels"
+                )
+            elif labels.min() < 0:
+                raise ValueError(
+                    f"label index '{labels.min()}' is a negative value"
+                )
+
+            # add -1 to represent null labels which should not be filtered
+            labels = np.concatenate([labels, np.array([-1])])
 
         filtered_detailed_pairs, _ = filter_cache(
             detailed_pairs=self._detailed_pairs,
             datum_mask=datum_mask,
-            valid_label_indices=valid_label_indices,
+            valid_label_indices=labels,
             n_labels=self.metadata.number_of_labels,
         )
 
         number_of_datums = (
-            len(datum_ids)
-            if datum_ids is not None
+            datums.size
+            if datums is not None
             else self.metadata.number_of_datums
         )
 
         return Filter(
             datum_mask=datum_mask,
-            valid_label_indices=valid_label_indices,
+            valid_label_indices=labels,
             metadata=Metadata.create(
                 detailed_pairs=filtered_detailed_pairs,
                 number_of_datums=number_of_datums,
 
@@ -173,82 +173,146 @@ def metadata(self) -> Metadata:
 
     def create_filter(
         self,
-        datum_ids: list[str] | None = None,
-        groundtruth_ids: list[str] | None = None,
-        prediction_ids: list[str] | None = None,
-        labels: list[str] | None = None,
+        datums: list[str] | NDArray[np.int32] | None = None,
+        groundtruths: list[str] | NDArray[np.int32] | None = None,
+        predictions: list[str] | NDArray[np.int32] | None = None,
+        labels: list[str] | NDArray[np.int32] | None = None,
     ) -> Filter:
         """
         Creates a filter object.
 
         Parameters
         ----------
-        datum_uids : list[str], optional
-            An optional list of string uids representing datums to keep.
-        groundtruth_ids : list[str], optional
-            An optional list of string uids representing ground truth annotations to keep.
-        prediction_ids : list[str], optional
-            An optional list of string uids representing prediction annotations to keep.
-        labels : list[str], optional
-            An optional list of labels to keep.
+        datum : list[str] | NDArray[int32], optional
+            An optional list of string ids or indices representing datums to keep.
+        groundtruth : list[str] | NDArray[int32], optional
+            An optional list of string ids or indices representing ground truth annotations to keep.
+        prediction : list[str] | NDArray[int32], optional
+            An optional list of string ids or indices representing prediction annotations to keep.
+        labels : list[str] | NDArray[int32], optional
+            An optional list of labels or indices to keep.
         """
         mask_datums = np.ones(self._detailed_pairs.shape[0], dtype=np.bool_)
 
         # filter datums
-        if datum_ids is not None:
-            if not datum_ids:
-                raise EmptyFilterError("filter removes all datums")
-            valid_datum_indices = np.array(
-                [self.datum_id_to_index[uid] for uid in datum_ids],
-                dtype=np.int32,
-            )
-            mask_datums = np.isin(
-                self._detailed_pairs[:, 0], valid_datum_indices
-            )
+        if datums is not None:
+            # convert to indices
+            if isinstance(datums, list):
+                datums = np.array(
+                    [self.datum_id_to_index[uid] for uid in datums],
+                    dtype=np.int32,
+                )
+
+            # validate indices
+            if datums.size == 0:
+                raise EmptyFilterError(
+                    "filter removes all datums"
+                )  # validate indices
+            elif datums.min() < 0:
+                raise ValueError(
+                    f"datum index cannot be negative '{datums.min()}'"
+                )
+            elif datums.max() >= len(self.index_to_datum_id):
+                raise ValueError(
+                    f"datum index cannot exceed total number of datums '{datums.max()}'"
+                )
+
+            # apply to mask
+            mask_datums = np.isin(self._detailed_pairs[:, 0], datums)
 
         filtered_detailed_pairs = self._detailed_pairs[mask_datums]
         n_pairs = self._detailed_pairs[mask_datums].shape[0]
         mask_groundtruths = np.zeros(n_pairs, dtype=np.bool_)
         mask_predictions = np.zeros_like(mask_groundtruths)
 
         # filter by ground truth annotation ids
-        if groundtruth_ids is not None:
-            valid_groundtruth_indices = np.array(
-                [self.groundtruth_id_to_index[uid] for uid in groundtruth_ids],
-                dtype=np.int32,
-            )
+        if groundtruths is not None:
+            # convert to indices
+            if isinstance(groundtruths, list):
+                groundtruths = np.array(
+                    [
+                        self.groundtruth_id_to_index[uid]
+                        for uid in groundtruths
+                    ],
+                    dtype=np.int32,
+                )
+
+            # validate indices
+            if groundtruths.size == 0:
+                warnings.warn("filter removes all ground truths")
+            elif groundtruths.min() < 0:
+                raise ValueError(
+                    f"groundtruth annotation index cannot be negative '{groundtruths.min()}'"
+                )
+            elif groundtruths.max() >= len(self.index_to_groundtruth_id):
+                raise ValueError(
+                    f"groundtruth annotation index cannot exceed total number of groundtruths '{groundtruths.max()}'"
+                )
+
+            # apply to mask
             mask_groundtruths[
                 ~np.isin(
                     filtered_detailed_pairs[:, 1],
-                    valid_groundtruth_indices,
+                    groundtruths,
                 )
             ] = True
 
         # filter by prediction annotation ids
-        if prediction_ids is not None:
-            valid_prediction_indices = np.array(
-                [self.prediction_id_to_index[uid] for uid in prediction_ids],
-                dtype=np.int32,
-            )
+        if predictions is not None:
+            # convert to indices
+            if isinstance(predictions, list):
+                predictions = np.array(
+                    [self.prediction_id_to_index[uid] for uid in predictions],
+                    dtype=np.int32,
+                )
+
+            # validate indices
+            if predictions.size == 0:
+                warnings.warn("filter removes all predictions")
+            elif predictions.min() < 0:
+                raise ValueError(
+                    f"prediction annotation index cannot be negative '{predictions.min()}'"
+                )
+            elif predictions.max() >= len(self.index_to_prediction_id):
+                raise ValueError(
+                    f"prediction annotation index cannot exceed total number of predictions '{predictions.max()}'"
+                )
+
+            # apply to mask
             mask_predictions[
                 ~np.isin(
                     filtered_detailed_pairs[:, 2],
-                    valid_prediction_indices,
+                    predictions,
                 )
             ] = True
 
         # filter by labels
         if labels is not None:
-            if not labels:
+            # convert to indices
+            if isinstance(labels, list):
+                labels = np.array(
+                    [self.label_to_index[label] for label in labels]
+                )
+
+            # validate indices
+            if labels.size == 0:
                 raise EmptyFilterError("filter removes all labels")
-            valid_label_indices = np.array(
-                [self.label_to_index[label] for label in labels] + [-1]
-            )
+            elif labels.min() < 0:
+                raise ValueError(
+                    f"label index cannot be negative '{labels.min()}'"
+                )
+            elif labels.max() >= len(self.index_to_label):
+                raise ValueError(
+                    f"label index cannot exceed total number of labels '{labels.max()}'"
+                )
+
+            # apply to mask
+            labels = np.concatenate([labels, np.array([-1])])  # add null label
             mask_groundtruths[
-                ~np.isin(filtered_detailed_pairs[:, 3], valid_label_indices)
+                ~np.isin(filtered_detailed_pairs[:, 3], labels)
             ] = True
             mask_predictions[
-                ~np.isin(filtered_detailed_pairs[:, 4], valid_label_indices)
+                ~np.isin(filtered_detailed_pairs[:, 4], labels)
             ] = True
 
         filtered_detailed_pairs, _, _ = filter_cache(
@@ -260,8 +324,8 @@ def create_filter(
         )
 
         number_of_datums = (
-            len(datum_ids)
-            if datum_ids
+            datums.size
+            if datums is not None
             else np.unique(filtered_detailed_pairs[:, 0]).size
         )