-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathvisualize_crop_preview.py
More file actions
402 lines (326 loc) · 14.4 KB
/
visualize_crop_preview.py
File metadata and controls
402 lines (326 loc) · 14.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
"""Visualize crop region preview on horizontal video.
This script shows where the vertical crop will be applied on the original horizontal video.
Red box indicates the crop region, detected objects are shown with colored boxes.
"""
import sys
import cv2
import numpy as np
from pathlib import Path
from typing import Optional
import time
# Add src to path
sys.path.insert(0, str(Path(__file__).parent))
from src.core.soccernet_detector import SoccerNetDetector
from src.core.temporal_filter import TemporalBallFilter
from src.core.roi_calculator import ROICalculator
from src.core.smoother import Smoother, create_smoother
from src.core.scene_detector import SceneDetector
from src.utils.video_utils import VideoReader
from src.utils.config import AppConfig
from src.models.detection_result import Detection, ROI, FrameDetections
def draw_detections(frame: np.ndarray, detections: list[Detection], show_labels: bool = False) -> np.ndarray:
"""Draw detection boxes on frame (minimal labels to avoid clutter).
Args:
frame: Input frame
detections: List of Detection objects
show_labels: Whether to show confidence labels (default: False)
Returns:
Frame with detection boxes drawn
"""
result = frame.copy()
for det in detections:
# Convert bbox (x, y, width, height) to (x1, y1, x2, y2)
x1 = det.bbox.x
y1 = det.bbox.y
x2 = det.bbox.x + det.bbox.width
y2 = det.bbox.y + det.bbox.height
# Color based on class: Ball = Bright Cyan, Person = Transparent Green
if det.is_ball:
color = (255, 255, 0) # Bright cyan for ball (highly visible)
thickness = 3
else:
color = (0, 200, 0) # Green for person/player (subtle)
thickness = 1
# Draw bounding box only (no labels to avoid clutter)
cv2.rectangle(result, (x1, y1), (x2, y2), color, thickness)
return result
def draw_crop_region(frame: np.ndarray, roi: ROI, crop_width: int, crop_height: int, locked: bool = False) -> np.ndarray:
"""Draw the crop region that will be used for vertical video.
Args:
frame: Input frame
roi: ROI object with center coordinates
crop_width: Width of crop region (9:16 aspect)
crop_height: Height of crop region
locked: Whether scene is locked (different color indicator)
Returns:
Frame with crop region drawn
"""
result = frame.copy()
frame_h, frame_w = frame.shape[:2]
# Calculate crop region bounds
half_w = crop_width // 2
half_h = crop_height // 2
center_x = int(roi.center_x)
center_y = int(roi.center_y)
# Ensure crop region stays within frame
x1 = max(0, center_x - half_w)
x2 = min(frame_w, center_x + half_w)
y1 = max(0, center_y - half_h)
y2 = min(frame_h, center_y + half_h)
# Color: Red for moving, Yellow for locked
box_color = (0, 200, 255) if locked else (0, 0, 255) # Yellow when locked, Red when moving
# Draw rectangle for crop region (thicker and semi-transparent)
cv2.rectangle(result, (x1, y1), (x2, y2), box_color, 3)
# Draw small crosshair at ROI center (minimal)
crosshair_size = 15
cv2.line(result,
(center_x - crosshair_size, center_y),
(center_x + crosshair_size, center_y),
box_color, 2)
cv2.line(result,
(center_x, center_y - crosshair_size),
(center_x, center_y + crosshair_size),
box_color, 2)
# Draw small center point
cv2.circle(result, (center_x, center_y), 4, box_color, -1)
return result
def add_info_overlay(frame: np.ndarray, frame_num: int, total_frames: int,
num_balls: int, num_players: int, fps: float) -> np.ndarray:
"""Add information overlay to frame.
Args:
frame: Input frame
frame_num: Current frame number
total_frames: Total number of frames
num_balls: Number of balls detected
num_players: Number of players detected
fps: Processing FPS
Returns:
Frame with info overlay
"""
result = frame.copy()
# Create semi-transparent overlay
overlay = result.copy()
h, w = result.shape[:2]
cv2.rectangle(overlay, (10, 10), (400, 120), (0, 0, 0), -1)
cv2.addWeighted(overlay, 0.6, result, 0.4, 0, result)
# Add text information
y_offset = 35
cv2.putText(result, f"Frame: {frame_num}/{total_frames}", (20, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
y_offset += 25
cv2.putText(result, f"Balls: {num_balls} | Players: {num_players}", (20, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
y_offset += 25
cv2.putText(result, f"Processing FPS: {fps:.1f}", (20, y_offset),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
return result
def visualize_crop_preview(
input_path: str,
output_path: str,
crop_width: int = 1080,
crop_height: int = 1920,
use_temporal_filter: bool = True,
use_enhanced_smoothing: bool = True,
use_scene_detection: bool = False
):
"""Create preview video showing crop region with ENHANCED stabilization.
Args:
input_path: Path to input horizontal video
output_path: Path to output preview video
crop_width: Width of vertical crop (default: 1080)
crop_height: Height of vertical crop (default: 1920)
use_temporal_filter: Apply temporal filtering to ball trajectory
use_enhanced_smoothing: Use enhanced smoothing (hysteresis + scene locking + adaptive EMA)
use_scene_detection: Use scene detection for ROI reset
"""
print("\n" + "=" * 70)
print("ENHANCED CROP REGION VISUALIZATION")
print("=" * 70)
print(f"Input: {input_path}")
print(f"Output: {output_path}")
print(f"Crop size: {crop_width}x{crop_height} (9:16 aspect ratio)")
print(f"Temporal filter: {use_temporal_filter}")
print(f"Enhanced smoothing: {use_enhanced_smoothing}")
print(f"Scene detection: {use_scene_detection}")
print("=" * 70)
# Initialize components with ENHANCED config
print("\n[1/6] Initializing enhanced components...")
config = AppConfig() # Uses new defaults (YOLOv8m, hysteresis, scene locking, adaptive EMA)
detector = SoccerNetDetector(config.detection)
temporal_filter = TemporalBallFilter(window_size=71, outlier_threshold=120) if use_temporal_filter else None
roi_calculator = ROICalculator(config.roi) # Now includes hysteresis and scene locking
# Use enhanced smoother (adaptive EMA)
smoother = create_smoother(config.smoother) if use_enhanced_smoothing else None
# Scene detector (optional)
scene_detector = SceneDetector(config.scene_detection) if use_scene_detection else None
scenes = []
# Detect scenes if enabled
if scene_detector and use_scene_detection:
print("[2/6] Detecting scene transitions...")
scenes = scene_detector.detect_scenes(input_path)
# Open video
print(f"[{'3' if use_scene_detection else '2'}/6] Opening video...")
reader = VideoReader(input_path)
frame_width = reader.width
frame_height = reader.height
fps = reader.fps
total_frames = reader.frame_count
print(f" Resolution: {frame_width}x{frame_height}")
print(f" FPS: {fps}")
print(f" Total frames: {total_frames}")
# Create output video writer
print(f"[{'4' if use_scene_detection else '3'}/6] Creating output video...")
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(str(output_path), fourcc, fps, (frame_width, frame_height))
if not out.isOpened():
raise RuntimeError(f"Failed to create output video: {output_path}")
# Process frames
print("[4/5] Processing frames...")
frame_num = 0
start_time = time.time()
# Storage for temporal filtering
all_detections = []
# First pass: detect all objects
print(" First pass: Detecting objects...")
for frame in reader:
frame_detections = detector.detect_frame(frame, frame_num, frame_num / fps)
detections = frame_detections.detections # Extract list of Detection objects
all_detections.append(detections)
frame_num += 1
if frame_num % 30 == 0:
print(f" Processed {frame_num}/{total_frames} frames")
# Apply temporal filter if enabled
if temporal_filter:
print(" Applying temporal filter to ball trajectory...")
# Convert to dict format: frame_number -> [ball_detections]
ball_detections_dict = {}
for frame_idx, detections in enumerate(all_detections):
balls = [d for d in detections if d.is_ball]
if balls:
ball_detections_dict[frame_idx] = balls
# Apply temporal filtering
ball_trajectory = temporal_filter.filter_trajectory(ball_detections_dict, total_frames)
# Merge filtered ball back with player detections
for frame_idx in range(len(all_detections)):
# Remove old ball detections
all_detections[frame_idx] = [d for d in all_detections[frame_idx] if not d.is_ball]
# Add filtered ball if available
ball_pos = ball_trajectory.get_position(frame_idx)
if ball_pos and ball_trajectory.is_detected(frame_idx):
# Find original ball detection for this frame
if frame_idx in ball_detections_dict and ball_detections_dict[frame_idx]:
# Use the filtered position but keep original detection
ball_det = ball_detections_dict[frame_idx][0]
all_detections[frame_idx].append(ball_det)
# Second pass: calculate ROI with ENHANCED stabilization
print(" Second pass: Calculating ROI with enhanced stabilization...")
all_rois = []
last_scene_id = -1
for frame_idx in range(total_frames):
detections = all_detections[frame_idx]
# Check for scene changes
if scenes and scene_detector:
current_scene = scene_detector.get_scene_for_frame(frame_idx, scenes)
if current_scene != last_scene_id and current_scene != -1:
print(f" Scene change detected at frame {frame_idx}")
roi_calculator.reset_lock() # Reset scene locking
last_scene_id = current_scene
# Get ball and player positions
ball_pos = None
players = []
for det in detections:
if det.is_ball:
ball_pos = (det.bbox.center_x, det.bbox.center_y)
else:
players.append(det)
# Calculate ROI with hysteresis and scene locking
roi = roi_calculator.calculate_roi_with_stabilization(
ball_pos, players, frame_width, frame_height, frame_num=frame_idx
)
all_rois.append(roi)
if (frame_idx + 1) % 100 == 0:
print(f" Calculated ROI for {frame_idx + 1}/{total_frames} frames")
# Apply smoothing if enabled (Adaptive EMA)
if smoother:
print(f" Applying enhanced smoothing ({config.smoother.method}) to ROI trajectory...")
all_rois = smoother.smooth_trajectory(all_rois)
# Third pass: visualize
print(" Third pass: Visualizing crop regions...")
reader = VideoReader(input_path) # Reset reader
frame_num = 0
for frame in reader:
detections = all_detections[frame_num]
roi = all_rois[frame_num]
# Check if scene is locked (for color indication)
is_locked = roi_calculator.frames_in_lock >= config.roi.lock_min_frames if config.roi.use_scene_locking else False
# Draw visualizations (minimal labels to avoid clutter)
# 1. Draw detections (no labels, just boxes)
vis_frame = draw_detections(frame, detections, show_labels=False)
# 2. Draw crop region (Yellow=locked, Red=moving)
vis_frame = draw_crop_region(vis_frame, roi, crop_width, crop_height, locked=is_locked)
# Write frame
out.write(vis_frame)
frame_num += 1
if frame_num % 30 == 0:
print(f" Visualized {frame_num}/{total_frames} frames")
# Cleanup
print("[6/6] Finalizing...")
out.release()
reader.close()
elapsed = time.time() - start_time
print("\n" + "=" * 70)
print("✓ VISUALIZATION COMPLETE!")
print("=" * 70)
print(f"Output saved to: {output_path}")
print(f"Processing time: {elapsed:.2f}s")
print(f"Average FPS: {total_frames / elapsed:.1f}")
print("=" * 70)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("\n" + "=" * 70)
print("ENHANCED CROP REGION VISUALIZATION")
print("=" * 70)
print(f" python {Path(__file__).name} <input_video> [output_video]")
print("\nExample:")
print(f" python {Path(__file__).name} input/goal_clip.mp4")
print(f" python {Path(__file__).name} input/goal_clip.mp4 output/preview.mp4")
print("\nVisualization Legend:")
print(" - RED box: Crop region (camera moving)")
print(" - YELLOW box: Crop region (camera LOCKED in scene)")
print(" - Cyan box: Detected ball (thick border)")
print(" - Green box: Detected players (thin border)")
print(" - Crosshair: ROI center point")
print("\nEnhancements Applied:")
print(" ✓ YOLOv8m detection (better accuracy)")
print(" ✓ Hysteresis dead zone (reduces jitter)")
print(" ✓ Scene-coherent locking (stable scenes)")
print(" ✓ Adaptive EMA smoothing (activity-aware)")
print("=" * 70)
sys.exit(1)
input_video = sys.argv[1]
if not Path(input_video).exists():
print(f"\n✗ Error: Input video not found: {input_video}")
sys.exit(1)
# Generate output path if not provided
if len(sys.argv) >= 3:
output_video = sys.argv[2]
else:
input_stem = Path(input_video).stem
output_video = f"output/{input_stem}_crop_preview.mp4"
try:
visualize_crop_preview(
input_path=input_video,
output_path=output_video,
crop_width=1080,
crop_height=1920,
use_temporal_filter=True,
use_enhanced_smoothing=True, # Uses hysteresis + scene locking + adaptive EMA
use_scene_detection=False # Set to True to enable scene detection (slower)
)
except Exception as e:
print(f"\n✗ Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)