Skip to content

Commit 85efa1b

Browse files
fix energy refinement
1 parent 77fe8e5 commit 85efa1b

File tree

16 files changed

+881
-1003
lines changed

16 files changed

+881
-1003
lines changed

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,6 @@ print(f"Best cut point: {best_cut.timestamp:.2f}s (score: {best_cut.score:.4f})"
165165
| `energy_threshold_db` | float | 8.0 | All | Minimum dB drop for refinement |
166166
| `energy_lookback_frames` | int | 20 | All | Max frames to search backward |
167167
| `disable_visual_analysis` | bool | False | All | Skip visual ranking, use speech end only |
168-
| `use_silence_cleaning` | bool | False | All | Remove incomplete speech segments |
169-
| `incomplete_threshold` | float | 0.5 | All | Max gap for incomplete segments (seconds) |
170168
| `openai_api_key` | str | None | All | OpenAI API key (or use env var) |
171169
| `airtable_access_token` | str | None | All | Airtable token (or use env var) |
172170
| `airtable_base_id` | str | None | All | Airtable base ID (or use env var) |

examples/basic_usage.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- Finds the best cut point and saves the video
66
- Saves annotated frames and detailed logs
77
- Works with local video files
8+
- Demonstrates custom configuration options
89
"""
910

1011
import logging
@@ -16,29 +17,43 @@
1617
)
1718

1819

19-
def main():
20-
video_path = "new_dataset/videos/scene4.mp4"
21-
output_path = "output/scene4_cut.mp4"
20+
def basic_example():
21+
"""
22+
Basic example: Using custom energy threshold and sample rate.
2223
23-
print("SceneFlow - Basic Usage Example")
24+
Demonstrates:
25+
- Custom energy threshold for speech end detection
26+
- Higher sample rate (1) for more precise frame analysis
27+
- Full frame annotation and logging
28+
"""
29+
video_path = "Video_Path"
30+
output_path = "output/demo.mp4"
31+
32+
print("\nSceneFlow - Advanced Configuration Example")
2433
print("=" * 60)
25-
print(f"Finding best cut point in: {video_path}")
34+
print(f"Finding cut point with custom settings in: {video_path}")
2635
print(f"Output will be saved to: {output_path}")
2736
print()
37+
2838
best_time = cut_video(
2939
video_path,
3040
output_path,
3141
save_frames=True,
3242
save_logs=True,
3343
use_energy_refinement=True,
34-
use_llm_selection=True
44+
# use_llm_selection=True,
45+
# disable_visual_analysis=True,
46+
energy_threshold_db=10,
47+
sample_rate=1,
3548
)
49+
3650
print()
3751
print("=" * 60)
38-
print(f"✓ Best cut point: {best_time:.2f} seconds")
52+
print(f"✓ Cut point found: {best_time:.2f} seconds")
3953
print(f"✓ Cut video saved to: {output_path}")
4054
print("=" * 60)
55+
print("\nCustom settings provide more control over speech detection and frame analysis")
4156

4257

4358
if __name__ == "__main__":
44-
main()
59+
basic_example()

src/sceneflow/api/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
cut points in videos.
55
"""
66

7-
from sceneflow.api.public import get_cut_frame, get_ranked_cut_frames, cut_video, _upload_to_airtable
7+
from sceneflow.api.public import get_cut_frame, get_ranked_cut_frames, cut_video
88

99
__all__ = [
1010
'get_cut_frame',
1111
'get_ranked_cut_frames',
1212
'cut_video',
13-
'_upload_to_airtable',
1413
]

src/sceneflow/api/_internal.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
"""Internal pipeline functions for SceneFlow API.
2+
3+
This module contains implementation details and should not be imported directly.
4+
Use the public API functions from sceneflow.api instead.
5+
"""
6+
7+
import logging
8+
from typing import Dict, List, Optional, Tuple
9+
10+
from sceneflow.shared.config import RankingConfig
11+
from sceneflow.detection import EnergyRefiner
12+
from sceneflow.shared.models import RankedFrame, FrameScore, FrameFeatures
13+
from sceneflow.core import CutPointRanker
14+
from sceneflow.detection import SpeechDetector
15+
from sceneflow.selection import LLMFrameSelector
16+
from sceneflow.utils.video import get_video_duration
17+
18+
logger = logging.getLogger(__name__)
19+
20+
21+
def detect_speech_end(
22+
video_path: str,
23+
use_energy_refinement: bool,
24+
energy_threshold_db: float,
25+
energy_lookback_frames: int,
26+
) -> Tuple[float, float, List[Dict[str, float]]]:
27+
"""Detect when speech ends in video using VAD and optional refinements."""
28+
logger.info("Stage 1: Detecting speech end time...")
29+
detector = SpeechDetector()
30+
31+
vad_speech_end_time, vad_timestamps = detector.get_speech_timestamps(video_path)
32+
logger.info("VAD detected speech end at: %.4fs", vad_speech_end_time)
33+
34+
speech_end_time = vad_speech_end_time
35+
pre_refinement_time = vad_speech_end_time
36+
37+
if use_energy_refinement:
38+
logger.info("Stage 1.5: Refining VAD-detected speech end time with energy analysis...")
39+
40+
before_energy = speech_end_time
41+
42+
refiner = EnergyRefiner(
43+
threshold_db=energy_threshold_db,
44+
lookback_frames=energy_lookback_frames
45+
)
46+
result = refiner.refine_speech_end(
47+
speech_end_time,
48+
video_path
49+
)
50+
51+
speech_end_time = result.refined_timestamp
52+
53+
if result.frames_adjusted > 0:
54+
logger.info(
55+
"Energy refinement adjusted timestamp by %d frames",
56+
result.frames_adjusted
57+
)
58+
pre_refinement_time = before_energy
59+
else:
60+
logger.info("Energy refinement: No adjustment needed")
61+
62+
visual_search_end_time = pre_refinement_time if speech_end_time < pre_refinement_time else -1.0
63+
64+
return speech_end_time, visual_search_end_time, vad_timestamps
65+
66+
67+
def rank_frames(
68+
video_path: str,
69+
speech_end_time: float,
70+
duration: float,
71+
config: Optional[RankingConfig],
72+
sample_rate: int,
73+
visual_search_end_time: float = -1.0,
74+
return_internals: bool = False
75+
) -> Tuple[List[RankedFrame], Optional[List[FrameFeatures]], Optional[List[FrameScore]]]:
76+
"""Rank frames after speech ends."""
77+
end_time = visual_search_end_time if visual_search_end_time > 0 else duration
78+
79+
logger.info("Stage 2: Ranking frames based on visual quality...")
80+
logger.info("Analyzing frames from %.4fs to %.4fs", speech_end_time, end_time)
81+
82+
ranker = CutPointRanker(config)
83+
84+
if return_internals:
85+
ranked_frames, features, scores = ranker.rank_frames(
86+
video_path=video_path,
87+
start_time=speech_end_time,
88+
end_time=end_time,
89+
sample_rate=sample_rate,
90+
return_internals=True
91+
)
92+
return ranked_frames, features, scores
93+
else:
94+
ranked_frames = ranker.rank_frames(
95+
video_path=video_path,
96+
start_time=speech_end_time,
97+
end_time=end_time,
98+
sample_rate=sample_rate,
99+
)
100+
return ranked_frames, None, None
101+
102+
103+
def select_best_with_llm(
104+
video_path: str,
105+
ranked_frames: List[RankedFrame],
106+
speech_end_time: float,
107+
duration: float,
108+
scores: List[FrameScore],
109+
features: List[FrameFeatures],
110+
openai_api_key: Optional[str]
111+
) -> RankedFrame:
112+
"""Use LLM to select best frame from top candidates."""
113+
if len(ranked_frames) < 2:
114+
return ranked_frames[0]
115+
try:
116+
selector = LLMFrameSelector(api_key=openai_api_key)
117+
return selector.select_best_frame(
118+
video_path=video_path,
119+
ranked_frames=ranked_frames,
120+
speech_end_time=speech_end_time,
121+
video_duration=duration,
122+
)
123+
except Exception as e:
124+
logger.warning("LLM selection failed: %s, using top result", e)
125+
return ranked_frames[0]
126+
127+
128+
def upload_to_airtable(
129+
video_path: str,
130+
best_frame: RankedFrame,
131+
scores: List[FrameScore],
132+
features: List[FrameFeatures],
133+
speech_end_time: float,
134+
duration: float,
135+
config: Optional[RankingConfig],
136+
sample_rate: int,
137+
airtable_access_token: Optional[str],
138+
airtable_base_id: Optional[str],
139+
airtable_table_name: Optional[str]
140+
) -> str:
141+
"""Upload analysis results to Airtable."""
142+
from sceneflow.integration import upload_to_airtable as airtable_upload
143+
144+
best_score = next((s for s in scores if s.frame_index == best_frame.frame_index), None)
145+
best_features = next((f for f in features if f.frame_index == best_frame.frame_index), None)
146+
147+
if not best_score or not best_features:
148+
raise RuntimeError("Could not upload to Airtable - missing data")
149+
150+
config_dict = {
151+
"sample_rate": sample_rate,
152+
"weights": {
153+
"eye": config.eye_weight if config else 0.4,
154+
"mouth": config.mouth_weight if config else 0.6,
155+
}
156+
}
157+
158+
record_id = airtable_upload(
159+
video_path=video_path,
160+
best_frame=best_frame,
161+
frame_score=best_score,
162+
frame_features=best_features,
163+
speech_end_time=speech_end_time,
164+
duration=duration,
165+
config_dict=config_dict,
166+
access_token=airtable_access_token,
167+
base_id=airtable_base_id,
168+
table_name=airtable_table_name
169+
)
170+
return record_id

0 commit comments

Comments
 (0)