Support Online Tracking

ptgamr · ptgamr · commit 99b891884efd · 2025-06-10T22:58:59.000+12:00
diff --git a/tracklab/engine/video.py b/tracklab/engine/video.py
@@ -49,16 +49,17 @@ def __init__(
     def track_dataset(self):
         """Run tracking on complete dataset."""
         self.callback("on_dataset_track_start")
+
         self.callback(
             "on_video_loop_start",
-            video_metadata=pd.Series(name=self.video_filename),
+            video_metadata=pd.Series({"name": self.video_filename}),
             video_idx=0,
             index=0,
         )
         detections = self.video_loop()
         self.callback(
             "on_video_loop_end",
-            video_metadata=pd.Series(name=self.video_filename),
+            video_metadata=pd.Series({"name": self.video_filename}),
             video_idx=0,
             detections=detections,
         )
@@ -81,6 +82,15 @@ def video_loop(self):
         # print('in offline.py, model_names: ', model_names)
         frame_idx = -1
         detections = pd.DataFrame()
+
+        # Initialize module callbacks at the start
+        for model_name in model_names:
+            dummy_dataloader = []
+            self.callback("on_module_start", task=model_name, dataloader=dummy_dataloader)
+
+        # Initialize image metadata for the current frame
+        image_metadata = pd.DataFrame()
+
         while video_cap.isOpened():
             frame_idx += 1
             ret, frame = video_cap.read()
@@ -89,10 +99,20 @@ def video_loop(self):
             image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             if not ret:
                 break
-            metadata = pd.Series({"id": frame_idx, "frame": frame_idx,
-                                  "video_id": video_filename}, name=frame_idx)
+
+            # Create base metadata for this frame
+            base_metadata = pd.Series({
+                "id": frame_idx,
+                "frame": frame_idx,
+                "video_id": video_filename
+            }, name=frame_idx)
+
+            # Reset image metadata for this frame with base metadata
+            image_metadata = pd.DataFrame([base_metadata])
+
             self.callback("on_image_loop_start",
-                          image_metadata=metadata, image_idx=frame_idx, index=frame_idx)
+                          image_metadata=base_metadata, image_idx=frame_idx, index=frame_idx)
+
             for model_name in model_names:
                 model = self.models[model_name]
                 if len(detections) > 0:
@@ -102,49 +122,59 @@ def video_loop(self):
                 if model.level == "video":
                     raise "Video-level not supported for online video tracking"
                 elif model.level == "image":
-                    batch = model.preprocess(image=image, detections=dets, metadata=metadata)
+                    batch = model.preprocess(image=image, detections=dets, metadata=image_metadata.iloc[0])
                     batch = type(model).collate_fn([(frame_idx, batch)])
-                    detections = self.default_step(batch, model_name, detections, metadata)
+                    detections, image_metadata = self.default_step(batch, model_name, detections, image_metadata)
                 elif model.level == "detection":
                     for idx, detection in dets.iterrows():
-                        batch = model.preprocess(image=image, detection=detection, metadata=metadata)
+                        batch = model.preprocess(image=image, detection=detection, metadata=image_metadata.iloc[0])
                         batch = type(model).collate_fn([(detection.name, batch)])
-                        detections = self.default_step(batch, model_name, detections, metadata)
+                        detections, image_metadata = self.default_step(batch, model_name, detections, image_metadata)
             self.callback("on_image_loop_end",
-                          image_metadata=metadata, image=image,
+                          image_metadata=image_metadata.iloc[0], image=image,
                           image_idx=frame_idx, detections=detections)
 
+        # Finalize module callbacks at the end
+        for model_name in model_names:
+            self.callback("on_module_end", task=model_name, detections=detections)
+
         return detections
 
-    def default_step(self, batch: Any, task: str, detections: pd.DataFrame, metadata, **kwargs):
+    def default_step(self, batch: Any, task: str, detections: pd.DataFrame, image_metadata: pd.DataFrame, **kwargs):
         model = self.models[task]
         self.callback(f"on_module_step_start", task=task, batch=batch)
         idxs, batch = batch
         idxs = idxs.cpu() if isinstance(idxs, torch.Tensor) else idxs
         if model.level == "image":
             log.info(f"step : {idxs}")
-            batch_metadatas = pd.DataFrame([metadata])
             if len(detections) > 0:
                 batch_input_detections = detections.loc[
-                    np.isin(detections.image_id, batch_metadatas.index)
+                    np.isin(detections.image_id, image_metadata.index)
                 ]
             else:
                 batch_input_detections = detections
             batch_detections = self.models[task].process(
                 batch,
                 batch_input_detections,
-                batch_metadatas)
+                image_metadata)
         else:
             batch_detections = detections.loc[idxs]
             batch_detections = self.models[task].process(
                 batch=batch,
                 detections=batch_detections,
-                metadatas=None,
+                metadatas=image_metadata,
                 **kwargs,
             )
+
+        # Handle tuple return values (some modules return (detections, metadatas))
+        if isinstance(batch_detections, tuple):
+            batch_detections, batch_metadatas = batch_detections
+            # Update image metadata with outputs from this module
+            image_metadata = merge_dataframes(image_metadata, batch_metadatas)
+
         detections = merge_dataframes(detections, batch_detections)
         self.callback(
             f"on_module_step_end", task=task, batch=batch, detections=detections
         )
-        return detections
+        return detections, image_metadata
 
diff --git a/tracklab/visualization/visualization_engine.py b/tracklab/visualization/visualization_engine.py
@@ -6,6 +6,8 @@
 
 import cv2
 import pandas as pd
+import numpy as np
+import platform
 
 from tracklab.callbacks import Progressbar, Callback
 from tracklab.visualization import Visualizer
@@ -23,6 +25,7 @@ class VisualizationEngine(Callback):
                      `draw_detection`.
         save_images: whether to save the visualization as image files (.jpeg)
         save_videos: whether to save the visualization as video files (.mp4)
+        show_online: whether to show online tracking in realtime (only work if the pipeline doesn't involve VideoLevelModule)
         process_n_videos: number of videos to visualize. Will visualize the first N videos.
         process_n_frames_by_video: number of frames per video to visualize. Will visualize
                                    frames every N/n frames (not first n frames)
@@ -32,6 +35,7 @@ def __init__(self,
                  visualizers: Dict[str, Visualizer],
                  save_images: bool = False,
                  save_videos: bool = False,
+                 show_online: bool = False,
                  video_fps: int = 25,
                  process_n_videos: Optional[int] = None,
                  process_n_frames_by_video: Optional[int] = None,
@@ -41,13 +45,17 @@ def __init__(self,
         self.save_dir = Path("visualization")
         self.save_images = save_images
         self.save_videos = save_videos
+        self.show_online = show_online
         self.video_fps = video_fps
         self.max_videos = process_n_videos
         self.max_frames = process_n_frames_by_video
+        self.windows = []
         for visualizer in visualizers.values():
             visualizer.post_init(**kwargs)
 
     def on_dataset_track_end(self, engine: "TrackingEngine"):
+        if self.show_online:
+            cv2.destroyAllWindows()
         if self.save_videos or self.save_images:
             log.info(f"Visualization output at : {self.save_dir.absolute()}")
 
@@ -58,31 +66,92 @@ def on_video_loop_end(self, engine, video_metadata, video_idx, detections,
             self.visualize(engine.tracker_state, video_idx, detections, image_pred, progress)
             progress.on_module_end(None, "vis", None)
 
-    """ 
-    #TODO implement the online visualization
-    previous code:
-        if self.cfg.show_online:
-        tracker_state = engine.tracker_state
-        if tracker_state.detections_gt is not None:
-            ground_truths = tracker_state.detections_gt[
-                tracker_state.detections_gt.image_id == image_metadata.name
-            ]
-        else:
-            ground_truths = None
-        if len(detections) == 0:
-            image = image
-        else:
-            detections = detections[detections.image_id == image_metadata.name]
-            image = self.draw_frame(image_metadata,
-                                    detections, ground_truths, "inf", image=image)
-        if platform.system() == "Linux" and self.video_name not in self.windows:
-            self.windows.append(self.video_name)
-            cv2.namedWindow(str(self.video_name),
-                            cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
-            cv2.resizeWindow(str(self.video_name), image.shape[1], image.shape[0])
-        cv2.imshow(str(self.video_name), image)
-        cv2.waitKey(1)
-    """
+    def on_image_loop_end(self, engine, image_metadata, image, image_idx, detections):
+        """
+        Handle real-time display during online video tracking.
+        """
+        if not self.show_online:
+            return
+
+        try:
+            # Filter detections for current frame
+            frame_detections = (
+                detections[detections.image_id == image_metadata.name]
+                if len(detections) > 0
+                else pd.DataFrame()
+            )
+
+            # Get ground truth (usually None for online tracking)
+            ground_truths = pd.DataFrame()
+
+            # Create dummy image metadata for compatibility
+            image_pred = pd.Series(
+                {
+                    "lines": getattr(image_metadata, "lines", {}),
+                    "keypoints": getattr(image_metadata, "keypoints", {}),
+                    "file_path": f"frame_{image_idx:06d}.jpg",  # Dummy path
+                },
+                name=image_metadata.name,
+            )
+
+            image_gt = pd.Series(
+                {
+                    "frame": image_idx,
+                    "nframes": -1,  # Unknown total frames in online mode
+                },
+                name=image_metadata.name,
+            )
+
+            # Draw frame with all visualizers
+            display_image = self.draw_online_frame(
+                image_metadata,
+                image,
+                frame_detections,
+                ground_truths,
+                image_pred,
+                image_gt,
+                nframes=-1,
+            )
+
+            # Display the image
+            video_name = str(engine.video_filename)
+            if platform.system() == "Linux" and video_name not in self.windows:
+                self.windows.append(video_name)
+                cv2.namedWindow(video_name, cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)
+                cv2.resizeWindow(
+                    video_name, display_image.shape[1], display_image.shape[0]
+                )
+
+            # Convert RGB to BGR for OpenCV display
+            cv2.imshow(video_name, display_image)
+            cv2.waitKey(1)  # Non-blocking wait
+
+        except Exception as e:
+            log.warning(f"Error in online visualization: {e}")
+
+    def draw_online_frame(
+        self,
+        image_metadata,
+        image,
+        detections_pred,
+        detections_gt,
+        image_pred,
+        image_gt,
+        nframes,
+    ):
+        """Draw frame using all configured visualizers."""
+        # Create a copy of the image to avoid modifying the original
+        image = np.copy(image)
+
+        for visualizer in self.visualizers.values():
+            try:
+                visualizer.draw_frame(
+                    image, detections_pred, detections_gt, image_pred, image_gt
+                )
+            except Exception as e:
+                log.warning(f"Visualizer {type(visualizer).__name__} raised error: {e}")
+
+        return final_patch(image)
 
     def visualize(self, tracker_state: TrackerState, video_id, detections, image_preds, progress=None):
         image_metadatas = tracker_state.image_metadatas[tracker_state.image_metadatas.video_id == video_id]