|
12 | 12 | from pathlib import Path |
13 | 13 | from typing import Dict, List, Optional, Union, Any |
14 | 14 | from datetime import datetime, timedelta |
| 15 | +import aiohttp |
| 16 | +from pydantic import HttpUrl |
15 | 17 |
|
16 | 18 | # Import models from the models module |
17 | 19 | from src.models.subtitles import ( |
@@ -159,26 +161,6 @@ def get_color_code_for_ass(color_name: str) -> str: |
159 | 161 | return color_map.get(color_name, "FFFFFF") # Default to white if color not found |
160 | 162 |
|
161 | 163 |
|
162 | | -def load_transcript(transcript_data: Union[Dict[str, Any], str, Path]) -> Transcript: |
163 | | - """ |
164 | | - Load a transcript file or dictionary and return a validated Transcript model. |
165 | | -
|
166 | | - Args: |
167 | | - transcript_data: Either a transcript data dictionary or path to JSON file |
168 | | -
|
169 | | - Returns: |
170 | | - Transcript object |
171 | | - """ |
172 | | - # Load transcript if a path was provided |
173 | | - if isinstance(transcript_data, (str, Path)): |
174 | | - with open(transcript_data, "r", encoding="utf-8") as f: |
175 | | - data = json.load(f) |
176 | | - else: |
177 | | - data = transcript_data |
178 | | - |
179 | | - return Transcript.model_validate(data) |
180 | | - |
181 | | - |
182 | 164 | def chunk_transcript( |
183 | 165 | transcript: Transcript, |
184 | 166 | max_duration: float = 5.0, |
@@ -207,7 +189,6 @@ def chunk_transcript( |
207 | 189 | """ |
208 | 190 | chunks = [] |
209 | 191 | current_chunk = {"text": "", "start": 0, "end": 0, "speaker": "", "words": []} |
210 | | - |
211 | 192 | for segment in transcript.segments: |
212 | 193 | # Skip very short segments |
213 | 194 | if segment.end - segment.start < 0.1: |
@@ -437,8 +418,8 @@ def add_speaker_prefixes( |
437 | 418 | return chunks |
438 | 419 |
|
439 | 420 |
|
440 | | -def create_track( |
441 | | - transcript_data: Union[Dict[str, Any], str, Path], |
| 421 | +def create_subtitles( |
| 422 | + transcript: Transcript, |
442 | 423 | format: str = "srt", |
443 | 424 | max_duration: float = 5.0, |
444 | 425 | max_length: int = 80, |
@@ -473,15 +454,6 @@ def create_track( |
473 | 454 | """ |
474 | 455 | # Normalize track format to TrackFormat enum internally |
475 | 456 | track_format = TrackFormat(format.lower()) |
476 | | - |
477 | | - # Load and validate transcript |
478 | | - transcript = load_transcript(transcript_data) |
479 | | - |
480 | | - # Generate source file information if transcript_data is a path |
481 | | - source_file = None |
482 | | - if isinstance(transcript_data, (str, Path)): |
483 | | - source_file = str(transcript_data) |
484 | | - |
485 | 457 | # Chunk the transcript |
486 | 458 | chunks = chunk_transcript( |
487 | 459 | transcript, |
@@ -534,7 +506,6 @@ def create_track( |
534 | 506 | speakers=speakers, |
535 | 507 | word_count=word_count, |
536 | 508 | duration=track_duration, |
537 | | - source_file=source_file, |
538 | 509 | style=AssStyle( |
539 | 510 | font_name=font_name, |
540 | 511 | font_size=font_size, |
|
0 commit comments