feat: Enhance video information retrieval and subtitle rendering

WEIFENG2333 · WEIFENG2333 · commit fdab06b21982 · 2026-01-28T01:23:26.000+08:00
- Updated `_get_video_info` function to return video resolution and duration, improving error handling for resolution extraction.
- Refactored calls to `_get_video_info` in `render_rounded_video` to utilize the new duration data.
- Streamlined code formatting for better readability across various functions in `rounded_renderer.py` and `subtitle_thread.py`.
- Improved context handling in `SubtitleThread` for better subtitle optimization based on video file context.

These changes enhance the functionality and maintainability of subtitle rendering and video processing features.
diff --git a/app/core/subtitle/rounded_renderer.py b/app/core/subtitle/rounded_renderer.py
@@ -23,23 +23,31 @@
 logger = setup_logger("subtitle.rounded")
 
 
-def _get_video_resolution(video_path: str) -> Tuple[int, int]:
-    """获取视频分辨率"""
+def _get_video_info(video_path: str) -> Tuple[int, int, float]:
+    """获取视频分辨率和时长"""
     result = subprocess.run(
         ["ffmpeg", "-i", video_path],
         capture_output=True,
         text=True,
         encoding="utf-8",
         errors="replace",
-        creationflags=(
-            getattr(subprocess, "CREATE_NO_WINDOW", 0) if os.name == "nt" else 0
-        ),
+        creationflags=(getattr(subprocess, "CREATE_NO_WINDOW", 0) if os.name == "nt" else 0),
     )
 
+    # 解析分辨率
+    width, height = 0, 0
     if match := re.search(r"Stream.*Video:.* (\d{2,5})x(\d{2,5})", result.stderr):
-        return int(match.group(1)), int(match.group(2))
+        width, height = int(match.group(1)), int(match.group(2))
+    else:
+        raise ValueError(f"无法获取视频分辨率: {video_path}")
+
+    # 解析时长
+    duration = 0.0
+    if match := re.search(r"Duration:\s*(\d+):(\d+):(\d+(?:\.\d+)?)", result.stderr):
+        h, m, s = match.groups()
+        duration = int(h) * 3600 + int(m) * 60 + float(s)
 
-    raise ValueError(f"无法获取视频分辨率: {video_path}")
+    return width, height, duration
 
 
 def render_text_block(
@@ -154,9 +162,7 @@ def render_subtitle_image(
         else []
     )
     secondary_lines = (
-        wrap_text(
-            secondary_text, font, width, style.padding_h, extra_margin=extra_margin
-        )
+        wrap_text(secondary_text, font, width, style.padding_h, extra_margin=extra_margin)
         if secondary_text
         else []
     )
@@ -169,11 +175,7 @@ def calc_block_height(lines: List[str]) -> float:
             return 0
         bbox = font.getbbox("测试Ag")
         line_h = bbox[3] - bbox[1]
-        return (
-            line_h * len(lines)
-            + style.line_spacing * (len(lines) - 1)
-            + style.padding_v * 2
-        )
+        return line_h * len(lines) + style.line_spacing * (len(lines) - 1) + style.padding_v * 2
 
     primary_height = calc_block_height(primary_lines)
     secondary_height = calc_block_height(secondary_lines)
@@ -254,15 +256,11 @@ def render_preview(
         )
 
     # 渲染字幕并叠加
-    subtitle_img = render_subtitle_image(
-        primary_text, secondary_text, width, height, style
-    )
+    subtitle_img = render_subtitle_image(primary_text, secondary_text, width, height, style)
     background.paste(subtitle_img, (0, 0), subtitle_img)
 
     # 保存到临时目录
-    with tempfile.NamedTemporaryFile(
-        mode="wb", suffix=".png", delete=False
-    ) as tmp_file:
+    with tempfile.NamedTemporaryFile(mode="wb", suffix=".png", delete=False) as tmp_file:
         background.save(tmp_file, "PNG")
         return tmp_file.name
 
@@ -302,8 +300,7 @@ def render_rounded_video(
     # 检查布局合理性
     if layout == SubtitleLayoutEnum.ONLY_TRANSLATE:
         has_translation = any(
-            seg.translated_text and seg.translated_text.strip()
-            for seg in asr_data.segments
+            seg.translated_text and seg.translated_text.strip() for seg in asr_data.segments
         )
         if not has_translation:
             layout = SubtitleLayoutEnum.ONLY_ORIGINAL
@@ -312,14 +309,13 @@ def render_rounded_video(
         or layout == SubtitleLayoutEnum.ORIGINAL_ON_TOP
     ):
         has_translation = any(
-            seg.translated_text and seg.translated_text.strip()
-            for seg in asr_data.segments
+            seg.translated_text and seg.translated_text.strip() for seg in asr_data.segments
         )
         if not has_translation:
             layout = SubtitleLayoutEnum.ONLY_ORIGINAL
 
     # 获取视频信息
-    width, height = _get_video_resolution(video_path)
+    width, height, video_duration = _get_video_info(video_path)
 
     # 构建并缩放样式
     style_config = rounded_style or {}
@@ -343,9 +339,7 @@ def render_rounded_video(
         temp_path = Path(temp_dir)
 
         # 步骤1: 生成所有字幕PNG (0-30%)
-        logger.info(
-            f"生成字幕PNG图片（共{len(asr_data.segments)}个，布局：{layout.value}）"
-        )
+        logger.info(f"生成字幕PNG图片（共{len(asr_data.segments)}个，布局：{layout.value}）")
         subtitle_frames = []
 
         for i, seg in enumerate(asr_data.segments):
@@ -372,9 +366,7 @@ def render_rounded_video(
             # 进度回调
             if progress_callback:
                 progress = int((i + 1) / len(asr_data.segments) * 30)
-                progress_callback(
-                    progress, f"生成字幕图片 {i + 1}/{len(asr_data.segments)}"
-                )
+                progress_callback(progress, f"生成字幕图片 {i + 1}/{len(asr_data.segments)}")
 
         if not subtitle_frames:
             raise ValueError("没有生成任何有效的字幕图片")
@@ -409,14 +401,12 @@ def render_rounded_video(
             # 判断是否是最后一批
             is_last_batch = batch_idx == total_batches - 1
             batch_output = (
-                output_path
-                if is_last_batch
-                else temp_path / f"batch_{batch_idx:03d}.mp4"
+                output_path if is_last_batch else temp_path / f"batch_{batch_idx:03d}.mp4"
             )
 
-            logger.info(
-                f"处理批次 {batch_idx + 1}/{total_batches}（{len(batch_frames)}个字幕）"
-            )
+            logger.info(f"处理批次 {batch_idx + 1}/{total_batches}（{len(batch_frames)}个字幕）")
+            # 构建 ffmpeg 命令
+            # -t 参数强制保持原视频时长，防止因 overlay 结束而截断视频
             cmd = [
                 "ffmpeg",
                 "-y",
@@ -426,7 +416,9 @@ def render_rounded_video(
                 "-map",
                 final_output,
                 "-map",
-                "0:a?",  # 每一批都需要映射音频流
+                "0:a?",
+                "-t",
+                str(video_duration),  # 强制保持原视频时长
                 "-c:v",
                 "libx264",
                 "-preset",
@@ -436,7 +428,7 @@ def render_rounded_video(
                 "-pix_fmt",
                 "yuv420p",
                 "-c:a",
-                "copy",  # 每一批都复制音频
+                "copy",
                 str(batch_output),
             ]
 
@@ -448,6 +440,8 @@ def render_rounded_video(
                 cmd,
                 capture_output=True,
                 text=True,
+                encoding="utf-8",
+                errors="replace",
                 creationflags=(
                     getattr(subprocess, "CREATE_NO_WINDOW", 0) if os.name == "nt" else 0
                 ),
diff --git a/app/thread/subtitle_thread.py b/app/thread/subtitle_thread.py
@@ -71,10 +71,12 @@ def _setup_llm_config(self) -> Optional[SubtitleConfig]:
 
     def run(self):
         # 设置任务上下文
-        file_name = Path(self.task.subtitle_path).name if self.task.subtitle_path else ""
+        task_file = (
+            Path(self.task.video_path) if self.task.video_path else Path(self.task.subtitle_path)
+        )
         set_task_context(
             task_id=self.task.task_id,
-            file_name=file_name,
+            file_name=task_file.name,
             stage="subtitle",
         )
 
@@ -115,7 +117,8 @@ def run(self):
                 self.update_all.emit(asr_data.to_json())
 
             # 3. 优化字幕
-            custom_prompt = subtitle_config.custom_prompt_text
+            context_info = f'The subtitles below are from a file named "{task_file}". Use this context to improve accuracy if needed.\n'
+            custom_prompt = context_info + (subtitle_config.custom_prompt_text or "") + "\n"
             self.subtitle_length = len(asr_data.segments)
 
             if subtitle_config.need_optimize:
@@ -175,9 +178,7 @@ def run(self):
                         update_callback=self.callback,
                     )
                 elif translator_service == TranslatorServiceEnum.DEEPLX:
-                    os.environ["DEEPLX_ENDPOINT"] = (
-                        subtitle_config.deeplx_endpoint or ""
-                    )
+                    os.environ["DEEPLX_ENDPOINT"] = subtitle_config.deeplx_endpoint or ""
                     translator = DeepLXTranslator(
                         thread_num=subtitle_config.thread_num,
                         batch_num=5,
@@ -212,25 +213,22 @@ def run(self):
             asr_data.save(
                 save_path=self.task.output_path or "",
                 ass_style=subtitle_config.subtitle_style or "",
-                layout=subtitle_config.subtitle_layout
-                or SubtitleLayoutEnum.ONLY_TRANSLATE,
+                layout=subtitle_config.subtitle_layout or SubtitleLayoutEnum.ONLY_TRANSLATE,
             )
             logger.info(f"字幕保存到 {self.task.output_path}")
 
             # 6. 文件移动与清理
             if self.task.need_next_task and self.task.video_path:
                 # 保存srt/ass文件到视频目录（对于全流程任务）
                 save_srt_path = (
-                    Path(self.task.video_path).parent
-                    / f"{Path(self.task.video_path).stem}.srt"
+                    Path(self.task.video_path).parent / f"{Path(self.task.video_path).stem}.srt"
                 )
                 asr_data.to_srt(
                     save_path=str(save_srt_path),
                     layout=subtitle_config.subtitle_layout,
                 )
                 save_ass_path = (
-                    Path(self.task.video_path).parent
-                    / f"{Path(self.task.video_path).stem}.ass"
+                    Path(self.task.video_path).parent / f"{Path(self.task.video_path).stem}.ass"
                 )
                 asr_data.to_ass(
                     save_path=str(save_ass_path),
@@ -267,15 +265,11 @@ def need_llm(self, subtitle_config: SubtitleConfig, asr_data: ASRData):
     def callback(self, result: List[SubtitleProcessData]):
         self.finished_subtitle_length += len(result)
         # 简单计算当前进度（0-100%）
-        progress = min(
-            int((self.finished_subtitle_length / self.subtitle_length) * 100), 100
-        )
+        progress = min(int((self.finished_subtitle_length / self.subtitle_length) * 100), 100)
         self.progress.emit(progress, self.tr("{0}% 处理字幕").format(progress))
         # 转换为字典格式供UI使用
         result_dict = {
-            str(data.index): data.translated_text
-            or data.optimized_text
-            or data.original_text
+            str(data.index): data.translated_text or data.optimized_text or data.original_text
             for data in result
         }
         self.update.emit(result_dict)