feat: overlay command

marcw · marcw · commit ce28eeb20198 · 2025-10-14T11:12:02.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,18 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.1.0] - 2025-10-14
+
+### Added
+
+- New `overlay` command to render a video of the cumulated tracked poses.
+
+## [1.0.0]
+
+### Added
+
+- Initial release
diff --git a/Makefile b/Makefile
@@ -0,0 +1,11 @@
+smoke-test:
+	echo "smoke testing track feature"
+	uv run choreopath track examples/fall-recovery-4.mp4 test-data/test.csv
+	echo "smoke testing draw feature"
+	uv run choreopath draw test-data/test.csv test-data/test.svg
+	echo "smoke testing analyze feature"
+	uv run choreopath analyze test-data/test.csv
+	echo "smoke testing overlay feature"
+	uv run choreopath overlay examples/fall-recovery-4.mp4 test-data/overlay.mp4
+	echo "smoke testing overlay feature"
+	uv run choreopath overlay --paths-only examples/fall-recovery-4.mp4 test-data/overlay-paths-only.mp4
diff --git a/README.md b/README.md
@@ -48,6 +48,12 @@ choreopath draw tracking_data.csv output.svg
 choreopath draw tracking_data.csv output.svg --width 1920 --height 1080 --min-visibility 0.7
 ```
 
+### Generate an overlay video
+
+```bash
+choreopath overlay video.mp4 video-overlay.mp4
+```
+
 ### Analyze tracking data
 
 ```bash
@@ -62,9 +68,19 @@ generated videos of dancers to create SVG that I could then plot using a pen plo
 See example files:
 
 [original video](examples/fall-recovery-4.mp4)
+[overlay video](examples/fall-recovery-4-overlay.mp4)
 [animated tracking data](examples/fall-recovery-4-animation.mp4)
-![examples/fall-recovery-4.svg](https://github.com/marcw/choreopath/blob/main/examples/fall-recovery-4.svg)
+[examples/fall-recovery-4.svg](https://github.com/marcw/choreopath/blob/main/examples/fall-recovery-4.svg)
+
+## Development
+
+- Use `uv`
+- Run smoke test by using `make smoke-test`
+
+## Changelog
+
+See [CHANGELOG.md](CHANGELOG.md).
 
 ## License
 
-This software is under a MIT license. Please see [LICENSE.md](LICENSE.md)
+This software is under a MIT license. See [LICENSE.md](LICENSE.md).
diff --git a/examples/fall-recovery-4-overlay.mp4 b/examples/fall-recovery-4-overlay.mp4
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "choreopath"
-version = "1.0.1"
+version = "1.1.0"
 description = "Transform human movement into generative art. Track body poses from video and create SVG visualizations of motion trajectories"
 readme = "README.md"
 requires-python = ">=3.12,<3.13"
diff --git a/src/choreopath/cli.py b/src/choreopath/cli.py
@@ -4,6 +4,7 @@
 from .video import Video
 from .svg_generator import SVGGenerator
 from .tracking_data import TrackingData
+from .video_overlay_renderer import VideoOverlayRenderer
 
 @click.group()
 def cli():
@@ -15,13 +16,13 @@ def cli():
 @click.option("--min-detection-confidence", type=float, default=0.5)
 @click.option("--min-tracking-confidence", type=float, default=0.5)
 def track(src, dst, min_detection_confidence, min_tracking_confidence):
-    video = Video(src)
+    video = Video(src, min_detection_confidence, min_tracking_confidence)
     click.echo(f"Tracking poses in {src}")
     click.echo("Found {} frames".format(video.total_frames()))
     click.echo("FPS: {}".format(video.fps()))
     click.echo("Tracking poses with min detection confidence: {} and min tracking confidence: {}".format(min_detection_confidence, min_tracking_confidence))
 
-    tracking_data = video.track_poses(min_detection_confidence, min_tracking_confidence)
+    tracking_data = video.track_poses()
 
     if tracking_data:
         df = pd.DataFrame(tracking_data)
@@ -30,6 +31,8 @@ def track(src, dst, min_detection_confidence, min_tracking_confidence):
         click.echo(f"Total data points: {len(tracking_data)}")
     else:
         click.echo("No tracking data found. Check if there are people visible in the video.")
+    
+    video.close()
 
 @cli.command(help='Generate SVG trajectories from body tracking data')
 @click.argument('src', type=click.Path(exists=True, readable=True, dir_okay=False))
@@ -75,3 +78,32 @@ def analyze(src, animation, fps):
 
     click.echo(f"\nGenerating tracking data animation to: {animation}")
     tracking_data.to_animation(animation, fps)
+
+@cli.command(help='Generate video with progressive pose path overlays')
+@click.argument('video', type=click.Path(exists=True, readable=True, dir_okay=False))
+@click.argument('output', type=click.Path(writable=True, dir_okay=False))
+@click.option('--min-detection-confidence', type=float, default=0.5, help='Minimum detection confidence')
+@click.option('--min-tracking-confidence', type=float, default=0.5, help='Minimum tracking confidence')
+@click.option('--min-visibility', type=float, default=0.5, help='Minimum visibility threshold')
+@click.option('--line-thickness', type=int, default=2, help='Path line thickness in pixels')
+@click.option('--no-current-point', is_flag=True, help='Disable current position marker')
+@click.option('--paths-only', is_flag=True, help='Render only paths')
+def overlay(video, output, min_detection_confidence, min_tracking_confidence, min_visibility, line_thickness, no_current_point, paths_only):
+    click.echo(f"Generating video overlay from {video} to {output}")
+    if paths_only:
+        click.echo("Mode: Paths only (black background)")
+
+    video = Video(video, min_detection_confidence, min_tracking_confidence)
+
+    renderer = VideoOverlayRenderer(
+        line_thickness=line_thickness,
+        show_current_point=not no_current_point,
+        min_visibility=min_visibility,
+        paths_only=paths_only,
+    )
+
+    renderer.render_overlay(video=video, output_path=output)
+
+    video.close()
+
+    click.echo(f"Video overlay saved to: {output}")
diff --git a/src/choreopath/colors.py b/src/choreopath/colors.py
@@ -0,0 +1,45 @@
+from typing import Tuple
+
+DEFAULT_PALETTE = {
+    'face': '#f87171',
+    'left_arm': '#fb923c',
+    'right_arm': '#facc15',
+    'hips': '#71717a',
+    'left_leg': '#06b6d4',
+    'right_leg': '#3b82f6'
+}
+
+class Palette:
+    def __init__(self):
+        self.body_colors = DEFAULT_PALETTE
+
+    def get_body_region_color(self, body_region: str) -> str:
+        """Get color for a specific body region."""
+        return self.body_colors[body_region]
+
+    def get_landmark_color(self, landmark_id: int) -> str:
+        """Get color for a specific landmark based on body region."""
+        if landmark_id <= 10:
+            return self.body_colors['face']
+        elif landmark_id in [11, 13, 15, 17, 19, 21]:
+            return self.body_colors['left_arm']
+        elif landmark_id in [12, 14, 16, 18, 20, 22]:
+            return self.body_colors['right_arm']
+        elif landmark_id in [23, 24]:
+            return self.body_colors['hips']
+        elif landmark_id in [25, 27, 29, 31]:
+            return self.body_colors['left_leg']
+        elif landmark_id in [26, 28, 30, 32]:
+            return self.body_colors['right_leg']
+        else:
+            return '#888888'  # Gray fallback
+
+    def get_landmark_color_bgr(self, landmark_id: int) -> Tuple[int, int, int]:
+        """Get BGR color tuple for landmark based on body region."""
+        return self.hex_to_bgr(self.get_landmark_color(landmark_id))
+
+    def hex_to_bgr(self, hex_color: str) -> Tuple[int, int, int]:
+        """Convert hex color (#RRGGBB) to BGR tuple for OpenCV."""
+        hex_color = hex_color.lstrip('#')
+        r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
+        return (b, g, r)
diff --git a/src/choreopath/svg_generator.py b/src/choreopath/svg_generator.py
@@ -3,6 +3,7 @@
 from typing import Dict, Tuple
 from .tracking_data import TrackingData
 import mediapipe as mp
+from .colors import Palette
 
 class SVGGenerator:
     """Generates SVG trajectories from body tracking data."""
@@ -13,14 +14,7 @@ def __init__(self, width: int = 1280, height: int = 720, show_legend: bool = Tru
         self.show_legend = show_legend
         
         # Color scheme for different body regions
-        self.body_colors = {
-            'face': '#f87171',
-            'left_arm': '#fb923c',
-            'right_arm': '#facc15',
-            'hips': '#71717a',
-            'left_leg': '#06b6d4',
-            'right_leg': '#3b82f6'
-        }
+        self.palette = Palette()
 
         # Hierarchical body structure mapping
         self.body_hierarchy = {
@@ -66,23 +60,6 @@ def __init__(self, width: int = 1280, height: int = 720, show_legend: bool = Tru
             32: ["body", "right leg"]
         }
     
-    def get_landmark_color(self, landmark_id: int) -> str:
-        """Get color for a specific landmark based on body region."""
-        if landmark_id <= 10:
-            return self.body_colors['face']
-        elif landmark_id in [11, 13, 15, 17, 19, 21]:
-            return self.body_colors['left_arm']
-        elif landmark_id in [12, 14, 16, 18, 20, 22]:
-            return self.body_colors['right_arm']
-        elif landmark_id in [23, 24]:
-            return self.body_colors['hips']
-        elif landmark_id in [25, 27, 29, 31]:
-            return self.body_colors['left_leg']
-        elif landmark_id in [26, 28, 30, 32]:
-            return self.body_colors['right_leg']
-        else:
-            return '#888888'  # Gray fallback
-    
     def normalize_to_svg_coords(self, x: float, y: float) -> Tuple[float, float]:
         """
         Convert normalized coordinates (0-1) to SVG coordinates.
@@ -187,7 +164,7 @@ def generate(self, tracking_data: TrackingData) -> ET.ElementTree:
                 path_data += f" L {x:.2f} {y:.2f}"
             
             path_elem.set('d', path_data)
-            path_elem.set('stroke', self.get_landmark_color(landmark_id))
+            path_elem.set('stroke', self.palette.get_landmark_color(landmark_id))
             path_elem.set('stroke-width', '1')
             path_elem.set('fill', 'none')
             path_elem.set('opacity', '0.7')
@@ -230,12 +207,12 @@ def add_legend(self, svg_root: ET.Element) -> None:
         
         # Legend entries
         legend_items = [
-            ('Face', self.body_colors['face']),
-            ('Left Arm', self.body_colors['left_arm']),
-            ('Right Arm', self.body_colors['right_arm']),
-            ('Hips', self.body_colors['hips']),
-            ('Left Leg', self.body_colors['left_leg']),
-            ('Right Leg', self.body_colors['right_leg'])
+            ('Face', self.palette.get_body_region_color('face')),
+            ('Left Arm', self.palette.get_body_region_color('left_arm')),
+            ('Right Arm', self.palette.get_body_region_color('right_arm')),
+            ('Hips', self.palette.get_body_region_color('hips')),
+            ('Left Leg', self.palette.get_body_region_color('left_leg')),
+            ('Right Leg', self.palette.get_body_region_color('right_leg'))
         ]
         
         for i, (label, color) in enumerate(legend_items):
diff --git a/src/choreopath/video.py b/src/choreopath/video.py
@@ -1,62 +1,89 @@
+from pydoc import doc
 import cv2
 import mediapipe as mp
-from typing import List, Dict
+import numpy as np
+from typing import List, Dict, Tuple
 
 class Video:
-    def __init__(self, path: str):
+    def __init__(self, path: str, min_detection_confidence: float = 0.5, min_tracking_confidence: float = 0.5):
         """
         Initialize the video object and open the video file for reading.
         """
         self.path = path
         self.cap = cv2.VideoCapture(self.path)
+        self.min_detection_confidence = min_detection_confidence
+        self.min_tracking_confidence = min_tracking_confidence
         if not self.cap.isOpened():
             raise ValueError(f"Error opening video file: {self.path}")
 
-    def track_poses(self, min_detection_confidence: float = 0.5, min_tracking_confidence: float = 0.5) -> List[Dict]:
+        self.pose = mp.solutions.pose.Pose(min_detection_confidence=self.min_detection_confidence, min_tracking_confidence=self.min_tracking_confidence)
+        self.pose_frame_count = 0
+
+    def next_pose(self) -> Tuple[bool, np.ndarray, Dict]:
+        if not self.cap.isOpened():
+            return False, None, []
+
+        ret, frame = self.cap.read()
+        if not ret:
+            return False, None, []
+
+        frame_tracking_data = []
+        timestamp = self.pose_frame_count / self.fps()
+
+        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        results = self.pose.process(rgb_frame)
+
+        if results.pose_landmarks:
+            for idx, landmark in enumerate(results.pose_landmarks.landmark):
+                landmark_name = mp.solutions.pose.PoseLandmark(idx).name
+                frame_tracking_data.append({
+                    'frame': self.pose_frame_count,
+                    'timestamp': timestamp,
+                    'landmark_name': landmark_name,
+                    'landmark_id': idx,
+                    'x': landmark.x,
+                    'y': landmark.y,
+                    'z': landmark.z,
+                    'visibility': landmark.visibility
+                })
+            
+            self.pose_frame_count += 1
+        
+        return True, frame, frame_tracking_data
+
+    def close(self):
+        self.pose.close()
+        self.cap.release()
+
+    def track_poses(self) -> List[Dict]:
         """
         Track poses in the video and return tracking data.
         """
-        pose = mp.solutions.pose.Pose(min_detection_confidence=min_detection_confidence, min_tracking_confidence=min_tracking_confidence)
-        _frame_count = 0
-        _fps = self.fps()
-        _total_frames = self.total_frames()
-
         tracking_data = []
 
-        while self.cap.isOpened():
-            ret, frame = self.cap.read()
-            if not ret:
+        while True:
+            ok, frame, frame_tracking_data = self.next_pose()
+            if not ok:
                 break
-            
-            # Convert BGR to RGB
-            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            
-            # Process frame
-            results = pose.process(rgb_frame)
-
-            _timestamp = _frame_count / _fps
-
-            # Extract landmarks
-            if results.pose_landmarks:
-                for idx, landmark in enumerate(results.pose_landmarks.landmark):
-                    landmark_name = mp.solutions.pose.PoseLandmark(idx).name
-                    tracking_data.append({
-                        'frame': _frame_count,
-                        'timestamp': _timestamp,
-                        'landmark_name': landmark_name,
-                        'landmark_id': idx,
-                        'x': landmark.x,
-                        'y': landmark.y,
-                        'z': landmark.z,
-                        'visibility': landmark.visibility
-                    })
-            
-            _frame_count += 1
-        
-        self.cap.release()
 
+            for frame in frame_tracking_data:
+                tracking_data.append(frame)
+        
         return tracking_data
 
+
+    def width(self) -> int:
+        """
+        Returns the width of the video.
+        """
+        return int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+
+    def height(self) -> int:
+        """
+        Returns the height of the video.
+        """
+        return int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
     def fps(self) -> float:
         """ 
         Returns the frames per second of the video.
diff --git a/src/choreopath/video_overlay_renderer.py b/src/choreopath/video_overlay_renderer.py
diff --git a/uv.lock b/uv.lock