Update pipeline_stg_cogvideox.py

kinam0252 · web-flow · commit c973b13d48c5 · 2025-03-06T12:53:40.000+09:00
diff --git a/examples/community/pipeline_stg_cogvideox.py b/examples/community/pipeline_stg_cogvideox.py
@@ -53,15 +53,22 @@
         >>> # Models: "THUDM/CogVideoX-2b" or "THUDM/CogVideoX-5b"
         >>> pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=torch.float16).to("cuda")
         >>> prompt = (
-        ...     "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. "
-        ...     "The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other "
-        ...     "pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, "
-        ...     "casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. "
-        ...     "The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical "
-        ...     "atmosphere of this unique musical performance."
+        ...     "A father and son building a treehouse together, their hands covered in sawdust and smiles on their faces, realistic style."
         ... )
-        >>> video = pipe(prompt=prompt, guidance_scale=6, num_inference_steps=50).frames[0]
-        >>> export_to_video(video, "output.mp4", fps=8)
+        
+        >>> # Configure STG mode options
+        >>> stg_applied_layers_idx = [11]  # Layer indices from 0 to 41
+        >>> stg_scale = 1.0  # Set to 0.0 for CFG
+        >>> do_rescaling = False
+        
+        >>> # Generate video frames with STG parameters
+        >>> frames = pipe(
+        ...     prompt=prompt,
+        ...     stg_applied_layers_idx=stg_applied_layers_idx,
+        ...     stg_scale=stg_scale,
+        ...     do_rescaling=do_rescaling,
+        ... ).frames[0]
+        >>> export_to_video(frames, "output.mp4", fps=8)
         ```
 """
 
@@ -861,4 +868,4 @@ def __call__(
         if not return_dict:
             return (video,)
 
-        return CogVideoXPipelineOutput(frames=video)
+        return CogVideoXPipelineOutput(frames=video)