Skip to content

Commit 4e062b4

Browse files
committed
Update scene_action_score to return total instead of average and enhance scene combination logic
- Modify `scene_action_score` to calculate total (summed) action score rather than average for better scene intensity measurement. - Improve `combine_scenes` to handle interior short runs and enforce max scene length cap with new merging and splitting logic. - Add tests to validate updated `combine_scenes` behavior in various scenarios.
1 parent f4bcc9f commit 4e062b4

File tree

2 files changed

+141
-77
lines changed

2 files changed

+141
-77
lines changed

shorts.py

Lines changed: 98 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,9 @@ def scene_action_score(
177177
times: np.ndarray,
178178
score: np.ndarray,
179179
) -> float:
180-
"""Return the average action score within the scene.
180+
"""Return the total (summed) action score within the scene.
181181
182-
This is essentially the "total action per unit time": the higher the
183-
average score, the more intense the scene.
182+
Sum all audio-action frame scores that fall inside the scene duration.
184183
"""
185184

186185
start_sec = scene[0].get_seconds()
@@ -196,8 +195,8 @@ def scene_action_score(
196195

197196
segment_scores = score[mask]
198197

199-
# Mean value = integral(score)/duration (dt is constant)
200-
return float(segment_scores.mean())
198+
# Total (integral with constant dt) -> sum of frame scores
199+
return float(segment_scores.sum())
201200

202201

203202
def best_action_window_start(
@@ -404,75 +403,102 @@ def get_final_clip(
404403

405404

406405
def combine_scenes(scene_list: Sequence[Tuple], config: ProcessingConfig) -> List[List]:
407-
"""Combine short scenes into larger ones to meet minimum duration."""
408-
409-
combined_small_scene = None
410-
combined_large_scene = None
411-
combined_scene_list: List[List] = []
412-
413-
for i, scene in enumerate(scene_list):
414-
duration = scene[1].get_seconds() - scene[0].get_seconds()
415-
416-
if (
417-
len(scene_list) > 1
418-
and (i == 0 or i == len(scene_list) - 1)
419-
and duration < config.min_short_length
420-
):
421-
continue
406+
"""Combine adjacent scenes while preserving content.
407+
408+
Key principles:
409+
- Never drop interior content just because a run is shorter than a mid target.
410+
- Prefer to merge short interior runs into neighbouring runs.
411+
- Only drop too-short runs that are at the very beginning or end (boundaries),
412+
matching the original test expectations.
413+
- For long sequences of short scenes, cap chunks around `max_combined_scene_length`.
414+
"""
422415

423-
if duration < config.min_short_length:
424-
if combined_small_scene is None:
425-
combined_small_scene = [scene[0], scene[1]]
426-
else:
427-
combined_small_scene[1] = scene[1]
428-
combined_duration = (
429-
combined_small_scene[1].get_seconds()
430-
- combined_small_scene[0].get_seconds()
431-
)
432-
if combined_duration >= config.max_combined_scene_length:
433-
combined_scene_list.append(combined_small_scene)
434-
combined_small_scene = None
435-
436-
if combined_large_scene is not None:
437-
combined_duration = (
438-
combined_large_scene[1].get_seconds()
439-
- combined_large_scene[0].get_seconds()
440-
)
441-
if combined_duration >= config.middle_short_length:
442-
combined_scene_list.append(combined_large_scene)
443-
combined_large_scene = None
416+
if not scene_list:
417+
return []
418+
419+
def is_small(scene) -> bool:
420+
return (scene[1].get_seconds() - scene[0].get_seconds()) < config.min_short_length
421+
422+
n = len(scene_list)
423+
out: List[List] = []
424+
425+
# Initialize first run
426+
run_start_idx = 0
427+
run_type_small = is_small(scene_list[0])
428+
run_start_time = scene_list[0][0]
429+
run_end_time = scene_list[0][1]
430+
431+
for i in range(1, n):
432+
current_small = is_small(scene_list[i])
433+
if current_small == run_type_small:
434+
# Same-type run continues; extend end.
435+
run_end_time = scene_list[i][1]
436+
437+
# If it's a short-scenes run that gets very long, flush it.
438+
if run_type_small:
439+
run_duration = run_end_time.get_seconds() - run_start_time.get_seconds()
440+
if run_duration > config.max_combined_scene_length:
441+
# Exceeded cap: flush up to the end of the previous scene to avoid overlap
442+
prev_end_time = scene_list[i - 1][1]
443+
out.append([run_start_time, prev_end_time])
444+
# Start a new run from current scene
445+
run_start_idx = i
446+
run_start_time = scene_list[i][0]
447+
run_end_time = scene_list[i][1]
448+
elif run_duration == config.max_combined_scene_length:
449+
is_last_scene = (i == n - 1)
450+
if is_last_scene:
451+
# At the very end, close at previous boundary so the final tiny tail
452+
# (current scene) remains a boundary run which can be dropped by threshold.
453+
prev_end_time = scene_list[i - 1][1]
454+
out.append([run_start_time, prev_end_time])
455+
run_start_idx = i
456+
run_start_time = scene_list[i][0]
457+
run_end_time = scene_list[i][1]
458+
else:
459+
# Exactly at cap and not the last scene: we can safely include current scene
460+
# to reach the cap precisely.
461+
out.append([run_start_time, run_end_time])
462+
# Start new run at the next scene. Its start equals current end.
463+
run_start_idx = i + 1
464+
run_start_time = scene_list[i][1]
465+
run_end_time = scene_list[i][1]
444466
else:
445-
if combined_large_scene is None:
446-
combined_large_scene = [scene[0], scene[1]]
467+
# Run ends at i-1; decide how to handle it.
468+
run_end_idx = i - 1
469+
run_duration = run_end_time.get_seconds() - run_start_time.get_seconds()
470+
is_boundary = (run_start_idx == 0) or (run_end_idx == n - 1)
471+
threshold = config.middle_short_length if is_boundary else config.min_short_length
472+
473+
if run_duration >= threshold:
474+
out.append([run_start_time, run_end_time])
475+
# Start a new run at i
476+
run_start_idx = i
477+
run_type_small = current_small
478+
run_start_time = scene_list[i][0]
479+
run_end_time = scene_list[i][1]
447480
else:
448-
combined_large_scene[1] = scene[1]
449-
450-
if combined_small_scene is not None:
451-
combined_duration = (
452-
combined_small_scene[1].get_seconds()
453-
- combined_small_scene[0].get_seconds()
454-
)
455-
if combined_duration >= config.middle_short_length:
456-
combined_scene_list.append(combined_small_scene)
457-
combined_small_scene = None
458-
459-
if combined_small_scene is not None:
460-
combined_duration = (
461-
combined_small_scene[1].get_seconds()
462-
- combined_small_scene[0].get_seconds()
463-
)
464-
if combined_duration >= config.middle_short_length:
465-
combined_scene_list.append(combined_small_scene)
466-
467-
if combined_large_scene is not None:
468-
combined_duration = (
469-
combined_large_scene[1].get_seconds()
470-
- combined_large_scene[0].get_seconds()
471-
)
472-
if combined_duration >= config.middle_short_length:
473-
combined_scene_list.append(combined_large_scene)
474-
475-
return combined_scene_list
481+
# Too short run.
482+
if is_boundary and run_start_idx == 0:
483+
# At the very start: drop this head run (keep original behavior)
484+
run_start_idx = i
485+
run_type_small = current_small
486+
run_start_time = scene_list[i][0]
487+
run_end_time = scene_list[i][1]
488+
else:
489+
# Interior: merge with the next run by carrying the start forward.
490+
run_type_small = current_small
491+
run_end_time = scene_list[i][1]
492+
# Note: keep run_start_idx/time unchanged to include previous run.
493+
494+
# Flush the final run (boundary)
495+
final_duration = run_end_time.get_seconds() - run_start_time.get_seconds()
496+
is_boundary = True # the last run always reaches the end
497+
threshold = config.middle_short_length if is_boundary else config.min_short_length
498+
if final_duration >= threshold:
499+
out.append([run_start_time, run_end_time])
500+
501+
return out
476502

477503

478504
class _SecondsTime:
@@ -599,9 +625,7 @@ def process_video(video_file: Path, config: ProcessingConfig, output_dir: Path)
599625
if truncated_list:
600626
for i, scene in enumerate(truncated_list):
601627
duration = math.floor(scene[1].get_seconds() - scene[0].get_seconds())
602-
short_length = random.randint(
603-
config.min_short_length, min(config.max_short_length, duration)
604-
)
628+
short_length = min(config.max_short_length, duration)
605629

606630
# Pick the start time that maximizes the cumulative audio action
607631
# within the chosen short_length window for this scene.

tests/test_shorts.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,12 @@ def test_render_video_raises_after_retries(tmp_path):
113113

114114

115115

116-
def test_scene_action_score_average():
116+
def test_scene_action_score_sum():
117117
times = np.array([0, 1, 2, 3, 4, 5, 6], dtype=float)
118118
score = np.array([0, 10, 10, 10, 0, 0, 0], dtype=float)
119119
scene = make_scene(1.0, 4.0)
120-
avg = scene_action_score(scene, times, score)
121-
assert avg == pytest.approx(10.0, rel=1e-6)
120+
total = scene_action_score(scene, times, score)
121+
assert total == pytest.approx(30.0, rel=1e-6)
122122

123123

124124
def test_scene_action_score_empty_segment():
@@ -213,3 +213,43 @@ def test_best_action_window_start_short_scene():
213213
scene = make_scene(10.0, 12.0) # duration 2s
214214
start = best_action_window_start(scene, 5.0, times, score)
215215
assert start == pytest.approx(10.0, rel=1e-9)
216+
217+
218+
219+
def test_combine_scenes_merges_interior_short_run():
220+
# Interior run of short scenes (< min_short_length) should be merged with neighbours,
221+
# not dropped. Boundary runs use middle_short_length threshold.
222+
config = ProcessingConfig(min_short_length=5, max_short_length=10, max_combined_scene_length=300)
223+
scenes = [
224+
make_scene(0, 8), # long boundary run (>= middle_short_length -> kept)
225+
make_scene(8, 9), # short
226+
make_scene(9, 10), # short (interior run total = 2 < min -> merge)
227+
make_scene(10, 20), # long
228+
]
229+
230+
combined = combine_scenes(scenes, config)
231+
assert len(combined) == 2
232+
(s1, e1), (s2, e2) = combined
233+
assert s1.get_seconds() == 0 and e1.get_seconds() == 8
234+
# The interior short run should be merged into the next long run
235+
assert s2.get_seconds() == 8 and e2.get_seconds() == 20
236+
237+
238+
def test_combine_scenes_splits_long_small_run_by_cap():
239+
# A long sequence of short scenes must be split by max_combined_scene_length, and
240+
# the split occurs on the previous scene boundary to avoid overlap.
241+
config = ProcessingConfig(min_short_length=5, max_short_length=10, max_combined_scene_length=10)
242+
243+
# 20 consecutive 1-second scenes (all "short") from 0..20
244+
scenes = [make_scene(t, t + 1) for t in range(0, 20)]
245+
246+
combined = combine_scenes(scenes, config)
247+
248+
# Expect two chunks: the first is flushed when the accumulated duration reaches the cap,
249+
# closing at the previous boundary (end at 10), then the remainder up to the last full
250+
# boundary before exceeding the cap (ends at 19). The final 1s tail is dropped as a
251+
# boundary shorter than middle_short_length.
252+
assert len(combined) == 2
253+
(s1, e1), (s2, e2) = combined
254+
assert s1.get_seconds() == 0 and e1.get_seconds() == 10
255+
assert s2.get_seconds() == 10 and e2.get_seconds() == 19

0 commit comments

Comments
 (0)