@@ -1642,8 +1642,9 @@ async def generate_video_reel(
16421642 Generates a complete video reel from a high-level request.
16431643 Orchestrates:
16441644 1. Scene breakdown (if not provided)
1645- 2. Parallel generation of Music and Scenes (Image -> Video, Audio)
1646- 3. Assembly using MoviePy
1645+ 2. Apply user-provided speech texts to scenes (if provided; otherwise no narration)
1646+ 3. Parallel generation of Music and Scenes (Image -> Video, Audio)
1647+ 4. Assembly using MoviePy
16471648 """
16481649 self .logger .info (f"Starting Video Reel Generation: { request .prompt } " )
16491650 start_time = time .time ()
@@ -1659,7 +1660,21 @@ async def generate_video_reel(
16591660 self .logger .info ("Breaking down prompt into scenes..." )
16601661 request .scenes = await self ._breakdown_prompt_to_scenes (request .prompt )
16611662
1662- # 2. Parallel Generation
1663+ # 2. Apply user-provided speech texts to scenes (if provided)
1664+ # This overrides any narration_text that might exist in scenes
1665+ if request .speech :
1666+ for i , scene in enumerate (request .scenes ):
1667+ if i < len (request .speech ):
1668+ scene .narration_text = request .speech [i ]
1669+ else :
1670+ # No speech provided for this scene
1671+ scene .narration_text = None
1672+ else :
1673+ # No speech provided at all - clear all narration
1674+ for scene in request .scenes :
1675+ scene .narration_text = None
1676+
1677+ # 3. Parallel Generation
16631678 # Task 1: Music
16641679 music_task = asyncio .create_task (
16651680 self ._generate_reel_music (request , output_directory )
@@ -1685,7 +1700,7 @@ async def generate_video_reel(
16851700 if not valid_scene_outputs :
16861701 raise RuntimeError ("All scene generations failed." )
16871702
1688- # 3 . Assembly
1703+ # 4 . Assembly
16891704 final_video_path = await self ._create_reel_assembly (
16901705 valid_scene_outputs ,
16911706 music_path ,
@@ -1718,9 +1733,10 @@ async def _breakdown_prompt_to_scenes(self, prompt: str) -> List[VideoReelScene]
17181733 - `background_prompt`: Detailed visual description for the background image.
17191734 - `foreground_prompt`: (Optional) Text describing a chart, KPI, or specific object to overlay. If not needed, omit.
17201735 - `video_prompt`: Instructions for animating the scene (e.g., "Slow pan up", "Cinematic zoom").
1721- - `narration_text`: (Optional) A short sentence for the narrator to read.
17221736 - `duration`: Duration in seconds (usually 3-5s).
17231737
1738+ Note: Do NOT generate narration text. Narration/speech is provided separately by the user.
1739+
17241740 Return the result as a JSON array of objects matching this schema.
17251741 """
17261742
@@ -1733,7 +1749,6 @@ async def _breakdown_prompt_to_scenes(self, prompt: str) -> List[VideoReelScene]
17331749 "background_prompt" : {"type" : "string" },
17341750 "foreground_prompt" : {"type" : "string" },
17351751 "video_prompt" : {"type" : "string" },
1736- "narration_text" : {"type" : "string" },
17371752 "duration" : {"type" : "number" }
17381753 },
17391754 "required" : ["background_prompt" , "video_prompt" , "duration" ]
0 commit comments