@@ -177,10 +177,9 @@ def scene_action_score(
177177 times : np .ndarray ,
178178 score : np .ndarray ,
179179) -> float :
180- """Return the average action score within the scene.
180+ """Return the total (summed) action score within the scene.
181181
182- This is essentially the "total action per unit time": the higher the
183- average score, the more intense the scene.
182+ Sum all audio-action frame scores that fall inside the scene duration.
184183 """
185184
186185 start_sec = scene [0 ].get_seconds ()
@@ -196,8 +195,8 @@ def scene_action_score(
196195
197196 segment_scores = score [mask ]
198197
199- # Mean value = integral(score)/duration (dt is constant)
200- return float (segment_scores .mean ())
198+ # Total (integral with constant dt) -> sum of frame scores
199+ return float (segment_scores .sum ())
201200
202201
203202def best_action_window_start (
@@ -404,75 +403,102 @@ def get_final_clip(
404403
405404
406405def combine_scenes (scene_list : Sequence [Tuple ], config : ProcessingConfig ) -> List [List ]:
407- """Combine short scenes into larger ones to meet minimum duration."""
408-
409- combined_small_scene = None
410- combined_large_scene = None
411- combined_scene_list : List [List ] = []
412-
413- for i , scene in enumerate (scene_list ):
414- duration = scene [1 ].get_seconds () - scene [0 ].get_seconds ()
415-
416- if (
417- len (scene_list ) > 1
418- and (i == 0 or i == len (scene_list ) - 1 )
419- and duration < config .min_short_length
420- ):
421- continue
406+ """Combine adjacent scenes while preserving content.
407+
408+ Key principles:
409+ - Never drop interior content just because a run is shorter than a mid target.
410+ - Prefer to merge short interior runs into neighbouring runs.
411+ - Only drop too-short runs that are at the very beginning or end (boundaries),
412+ matching the original test expectations.
413+ - For long sequences of short scenes, cap chunks around `max_combined_scene_length`.
414+ """
422415
423- if duration < config .min_short_length :
424- if combined_small_scene is None :
425- combined_small_scene = [scene [0 ], scene [1 ]]
426- else :
427- combined_small_scene [1 ] = scene [1 ]
428- combined_duration = (
429- combined_small_scene [1 ].get_seconds ()
430- - combined_small_scene [0 ].get_seconds ()
431- )
432- if combined_duration >= config .max_combined_scene_length :
433- combined_scene_list .append (combined_small_scene )
434- combined_small_scene = None
435-
436- if combined_large_scene is not None :
437- combined_duration = (
438- combined_large_scene [1 ].get_seconds ()
439- - combined_large_scene [0 ].get_seconds ()
440- )
441- if combined_duration >= config .middle_short_length :
442- combined_scene_list .append (combined_large_scene )
443- combined_large_scene = None
416+ if not scene_list :
417+ return []
418+
419+ def is_small (scene ) -> bool :
420+ return (scene [1 ].get_seconds () - scene [0 ].get_seconds ()) < config .min_short_length
421+
422+ n = len (scene_list )
423+ out : List [List ] = []
424+
425+ # Initialize first run
426+ run_start_idx = 0
427+ run_type_small = is_small (scene_list [0 ])
428+ run_start_time = scene_list [0 ][0 ]
429+ run_end_time = scene_list [0 ][1 ]
430+
431+ for i in range (1 , n ):
432+ current_small = is_small (scene_list [i ])
433+ if current_small == run_type_small :
434+ # Same-type run continues; extend end.
435+ run_end_time = scene_list [i ][1 ]
436+
437+ # If it's a short-scenes run that gets very long, flush it.
438+ if run_type_small :
439+ run_duration = run_end_time .get_seconds () - run_start_time .get_seconds ()
440+ if run_duration > config .max_combined_scene_length :
441+ # Exceeded cap: flush up to the end of the previous scene to avoid overlap
442+ prev_end_time = scene_list [i - 1 ][1 ]
443+ out .append ([run_start_time , prev_end_time ])
444+ # Start a new run from current scene
445+ run_start_idx = i
446+ run_start_time = scene_list [i ][0 ]
447+ run_end_time = scene_list [i ][1 ]
448+ elif run_duration == config .max_combined_scene_length :
449+ is_last_scene = (i == n - 1 )
450+ if is_last_scene :
451+ # At the very end, close at previous boundary so the final tiny tail
452+ # (current scene) remains a boundary run which can be dropped by threshold.
453+ prev_end_time = scene_list [i - 1 ][1 ]
454+ out .append ([run_start_time , prev_end_time ])
455+ run_start_idx = i
456+ run_start_time = scene_list [i ][0 ]
457+ run_end_time = scene_list [i ][1 ]
458+ else :
459+ # Exactly at cap and not the last scene: we can safely include current scene
460+ # to reach the cap precisely.
461+ out .append ([run_start_time , run_end_time ])
462+ # Start new run at the next scene. Its start equals current end.
463+ run_start_idx = i + 1
464+ run_start_time = scene_list [i ][1 ]
465+ run_end_time = scene_list [i ][1 ]
444466 else :
445- if combined_large_scene is None :
446- combined_large_scene = [scene [0 ], scene [1 ]]
467+ # Run ends at i-1; decide how to handle it.
468+ run_end_idx = i - 1
469+ run_duration = run_end_time .get_seconds () - run_start_time .get_seconds ()
470+ is_boundary = (run_start_idx == 0 ) or (run_end_idx == n - 1 )
471+ threshold = config .middle_short_length if is_boundary else config .min_short_length
472+
473+ if run_duration >= threshold :
474+ out .append ([run_start_time , run_end_time ])
475+ # Start a new run at i
476+ run_start_idx = i
477+ run_type_small = current_small
478+ run_start_time = scene_list [i ][0 ]
479+ run_end_time = scene_list [i ][1 ]
447480 else :
448- combined_large_scene [1 ] = scene [1 ]
449-
450- if combined_small_scene is not None :
451- combined_duration = (
452- combined_small_scene [1 ].get_seconds ()
453- - combined_small_scene [0 ].get_seconds ()
454- )
455- if combined_duration >= config .middle_short_length :
456- combined_scene_list .append (combined_small_scene )
457- combined_small_scene = None
458-
459- if combined_small_scene is not None :
460- combined_duration = (
461- combined_small_scene [1 ].get_seconds ()
462- - combined_small_scene [0 ].get_seconds ()
463- )
464- if combined_duration >= config .middle_short_length :
465- combined_scene_list .append (combined_small_scene )
466-
467- if combined_large_scene is not None :
468- combined_duration = (
469- combined_large_scene [1 ].get_seconds ()
470- - combined_large_scene [0 ].get_seconds ()
471- )
472- if combined_duration >= config .middle_short_length :
473- combined_scene_list .append (combined_large_scene )
474-
475- return combined_scene_list
481+ # Too short run.
482+ if is_boundary and run_start_idx == 0 :
483+ # At the very start: drop this head run (keep original behavior)
484+ run_start_idx = i
485+ run_type_small = current_small
486+ run_start_time = scene_list [i ][0 ]
487+ run_end_time = scene_list [i ][1 ]
488+ else :
489+ # Interior: merge with the next run by carrying the start forward.
490+ run_type_small = current_small
491+ run_end_time = scene_list [i ][1 ]
492+ # Note: keep run_start_idx/time unchanged to include previous run.
493+
494+ # Flush the final run (boundary)
495+ final_duration = run_end_time .get_seconds () - run_start_time .get_seconds ()
496+ is_boundary = True # the last run always reaches the end
497+ threshold = config .middle_short_length if is_boundary else config .min_short_length
498+ if final_duration >= threshold :
499+ out .append ([run_start_time , run_end_time ])
500+
501+ return out
476502
477503
478504class _SecondsTime :
@@ -599,9 +625,7 @@ def process_video(video_file: Path, config: ProcessingConfig, output_dir: Path)
599625 if truncated_list :
600626 for i , scene in enumerate (truncated_list ):
601627 duration = math .floor (scene [1 ].get_seconds () - scene [0 ].get_seconds ())
602- short_length = random .randint (
603- config .min_short_length , min (config .max_short_length , duration )
604- )
628+ short_length = min (config .max_short_length , duration )
605629
606630 # Pick the start time that maximizes the cumulative audio action
607631 # within the chosen short_length window for this scene.
0 commit comments