Merge pull request #242 from KumarLabJax/move-no-prediction

gbeane · web-flow · commit 146bd5adf11d · 2026-01-09T11:13:53.000-05:00
Move no prediction
diff --git a/src/jabs/classifier/classifier.py b/src/jabs/classifier/classifier.py
@@ -490,35 +490,71 @@ def sort_features_to_classify(self, features):
         features_sorted = features[classifier_columns]
         return features_sorted
 
-    def predict(self, features):
-        """predict classes for a given set of features"""
+    def predict(
+        self, features: pd.DataFrame, frame_indexes: np.ndarray | None = None
+    ) -> np.ndarray:
+        """predict classes for a given set of features
+
+        Args:
+            features: DataFrame of feature data to classify
+            frame_indexes: frame indexes to classify (default all)
+
+        Returns:
+            predicted class vector
+        """
         if self._classifier_type in (ClassifierType.XGBOOST, ClassifierType.CATBOOST):
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore", category=FutureWarning)
                 # XGBoost and CatBoost can handle NaN, just replace infinities
                 result = self._classifier.predict(
                     self.sort_features_to_classify(features.replace([np.inf, -np.inf], np.nan))
                 )
-            return result
-        # Random Forest can't handle NAs & infs, so fill them with 0s
-        return self._classifier.predict(
-            self.sort_features_to_classify(features.replace([np.inf, -np.inf], 0).fillna(0))
-        )
+        else:
+            # Random forests and gradient boost can't handle NAs & infs, so fill them with 0s
+            result = self._classifier.predict(
+                self.sort_features_to_classify(features.replace([np.inf, -np.inf], 0).fillna(0))
+            )
+
+        # Insert -1s into class prediction when no prediction is made
+        if frame_indexes is not None:
+            result_adjusted = np.full(result.shape, -1, dtype=np.int8)
+            result_adjusted[frame_indexes] = result[frame_indexes]
+            result = result_adjusted
+
+        return result
 
-    def predict_proba(self, features):
-        """predict probabilities for a given set of features"""
+    def predict_proba(
+        self, features: pd.DataFrame, frame_indexes: np.ndarray | None = None
+    ) -> np.ndarray:
+        """predict probabilities for a given set of features.
+
+        Args:
+            features: DataFrame of feature data to classify
+            frame_indexes: frame indexes to classify (default all)
+
+        Returns:
+            prediction probability matrix
+        """
         if self._classifier_type in (ClassifierType.XGBOOST, ClassifierType.CATBOOST):
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore", category=FutureWarning)
                 # XGBoost and CatBoost can handle NaN, just replace infinities
                 result = self._classifier.predict_proba(
                     self.sort_features_to_classify(features.replace([np.inf, -np.inf], np.nan))
                 )
-            return result
-        # Random Forest can't handle NAs & infs, so fill them with 0s
-        return self._classifier.predict_proba(
-            self.sort_features_to_classify(features.replace([np.inf, -np.inf], 0).fillna(0))
-        )
+        else:
+            # Random forests and gradient boost can't handle NAs & infs, so fill them with 0s
+            result = self._classifier.predict_proba(
+                self.sort_features_to_classify(features.replace([np.inf, -np.inf], 0).fillna(0))
+            )
+
+        # Insert 0 probabilities when no prediction is made
+        if frame_indexes is not None:
+            result_adjusted = np.full(result.shape, 0, dtype=np.float32)
+            result_adjusted[frame_indexes] = result[frame_indexes]
+            result = result_adjusted
+
+        return result
 
     def save(self, path: Path):
         """save the classifier to a file
diff --git a/src/jabs/project/project.py b/src/jabs/project/project.py
@@ -441,7 +441,6 @@ def save_predictions(
         video_name: str,
         predictions: dict[int, np.ndarray],
         probabilities: dict[int, np.ndarray],
-        frame_indexes: dict[int, np.ndarray],
         behavior: str,
         classifier: object,
     ) -> None:
@@ -452,25 +451,8 @@ def save_predictions(
             video_name: name of the video these predictions correspond to.
             predictions: dict mapping identity to a 1D numpy array of predicted labels.
             probabilities: same structure as `predictions` but with floating-point values.
-            frame_indexes: dict mapping identity to 1D numpy array of absolute frame indices
-                listing the frames where the identity has a valid pose (i.e., frames with a meaningful prediction).
             behavior: string behavior name.
             classifier: Classifier object used to generate the predictions.
-
-        Note:
-            Currently, the classifier runs on every frame for every identity -- even when pose is invalid
-            and features are NaN. We copy values for *only* the frames with a valid pose. This is why we
-            index *both* the source and destination with `indexes` (an array with the absolute frame indices
-            of frames with a valid pose), e.g.:
-
-                prediction_labels[identity, indexes] = predictions[video][identity][indexes]
-                prediction_prob[identity, indexes]   = probabilities[video][identity][indexes]
-
-            This leaves the output arrays with default values (-1 for labels, 0.0 for probabilities) for frames
-            without pose.
-
-            In the future, if the upstream caller were to provide compact arrays of length `len(indexes)`
-            instead of full-length arrays, the copy logic would need to drop the indexing on the source side.
         """
         # set up an output filename based on the video names
         file_base = Path(video_name).with_suffix("").name + ".h5"
@@ -482,17 +464,10 @@ def save_predictions(
         )
         prediction_prob = np.zeros_like(prediction_labels, dtype=np.float32)
 
-        # populate numpy arrays
+        # stack the numpy arrays
         for identity in predictions:
-            indexes = frame_indexes[identity]
-
-            # 'indexes' are absolute frame indices where this identity has a valid pose.
-            # predictions[identity] and probabilities[identity] are full-length arrays
-            # (len == num_frames); however, only elements at 'indexes' contain meaningful values.
-            # We index both source and destination with 'indexes' to copy only those valid-pose frames.
-            # If upstream ever provides compact arrays instead, drop the source-side indexing.
-            prediction_labels[identity, indexes] = predictions[identity][indexes]
-            prediction_prob[identity, indexes] = probabilities[identity][indexes]
+            prediction_labels[identity] = predictions[identity]
+            prediction_prob[identity] = probabilities[identity]
 
         # write to h5 file
         self._prediction_manager.write_predictions(
diff --git a/src/jabs/scripts/classify.py b/src/jabs/scripts/classify.py
@@ -137,22 +137,18 @@ def classify_pose(
             data = Classifier.combine_data(per_frame_features, window_features)
 
             if data.shape[0] > 0:
-                pred = classifier.predict(data)
-                pred_prob = classifier.predict_proba(data)
+                pred = classifier.predict(data, features["frame_indexes"])
+                pred_prob = classifier.predict_proba(data, features["frame_indexes"])
 
                 # Keep the probability for the predicted class only.
                 # The following code uses some
                 # numpy magic to use the pred array as column indexes
                 # for each row of the pred_prob array we just computed.
                 pred_prob = pred_prob[np.arange(len(pred_prob)), pred]
 
-                # Only copy out predictions where there was a valid pose
-                prediction_labels[curr_id, features["frame_indexes"]] = pred[
-                    features["frame_indexes"]
-                ]
-                prediction_prob[curr_id, features["frame_indexes"]] = pred_prob[
-                    features["frame_indexes"]
-                ]
+                # Copy results into results matrix
+                prediction_labels[curr_id] = pred
+                prediction_prob[curr_id] = pred_prob
             progress.update(task, advance=1)
 
     print(f"Writing predictions to {out_dir}")
diff --git a/src/jabs/ui/central_widget.py b/src/jabs/ui/central_widget.py
@@ -935,7 +935,6 @@ def _classify_thread_complete(self, output: dict) -> None:
         # display the new predictions
         self._predictions = output["predictions"]
         self._probabilities = output["probabilities"]
-        self._frame_indexes = output["frame_indexes"]
         self._cleanup_progress_dialog()
         self._cleanup_classify_thread()
         self.status_message.emit("Classification Complete", 3000)
diff --git a/src/jabs/ui/classification_thread.py b/src/jabs/ui/classification_thread.py
@@ -84,7 +84,6 @@ def run(self) -> None:
         self._tasks_complete = 0
         current_video_predictions = {}
         current_video_probabilities = {}
-        current_video_frame_indexes = {}
 
         def check_termination_requested() -> None:
             if self._should_terminate:
@@ -104,7 +103,6 @@ def check_termination_requested() -> None:
                 # collect predictions, probabilities, and frame indexes for each identity in the video
                 predictions = {}
                 probabilities = {}
-                frame_indexes = {}
 
                 for identity in pose_est.identities:
                     check_termination_requested()
@@ -136,31 +134,27 @@ def check_termination_requested() -> None:
                     check_termination_requested()
                     if data.shape[0] > 0:
                         # make predictions
-                        # Note: this makes predictions for all frames in the video, even those without valid pose
-                        # We will later filter these out when saving the predictions to disk
-                        # consider changing this to only predict on frames with valid pose
-                        predictions[identity] = self._classifier.predict(data)
+                        predictions[identity] = self._classifier.predict(
+                            data, feature_values["frame_indexes"]
+                        )
 
                         # also get the probabilities
-                        prob = self._classifier.predict_proba(data)
+                        prob = self._classifier.predict_proba(
+                            data, feature_values["frame_indexes"]
+                        )
                         # Save the probability for the predicted class only.
                         # The following code uses some
                         # numpy magic to use the _predictions array as column indexes
                         # for each row of the 'prob' array we just computed.
                         probabilities[identity] = prob[np.arange(len(prob)), predictions[identity]]
-
-                        # save the indexes for the predicted frames
-                        frame_indexes[identity] = feature_values["frame_indexes"]
                     else:
                         predictions[identity] = np.array(0)
                         probabilities[identity] = np.array(0)
-                        frame_indexes[identity] = np.array(0)
 
                 if video == self._current_video:
                     # keep predictions for the video currently loaded in the video player
                     current_video_predictions = predictions.copy()
                     current_video_probabilities = probabilities.copy()
-                    current_video_frame_indexes = frame_indexes.copy()
 
                 # save predictions to disk
                 self.current_status.emit("Saving Predictions")
@@ -169,7 +163,6 @@ def check_termination_requested() -> None:
                     video,
                     predictions,
                     probabilities,
-                    frame_indexes,
                     self._behavior,
                     self._classifier,
                 )
@@ -183,7 +176,6 @@ def check_termination_requested() -> None:
                 {
                     "predictions": current_video_predictions,
                     "probabilities": current_video_probabilities,
-                    "frame_indexes": current_video_frame_indexes,
                 }
             )
         except Exception as e: