5353 >>> # Models: "THUDM/CogVideoX-2b" or "THUDM/CogVideoX-5b"
5454 >>> pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=torch.float16).to("cuda")
5555 >>> prompt = (
56- ... "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. "
57- ... "The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other "
58- ... "pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, "
59- ... "casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. "
60- ... "The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical "
61- ... "atmosphere of this unique musical performance."
56+ ... "A father and son building a treehouse together, their hands covered in sawdust and smiles on their faces, realistic style."
6257 ... )
63- >>> video = pipe(prompt=prompt, guidance_scale=6, num_inference_steps=50).frames[0]
64- >>> export_to_video(video, "output.mp4", fps=8)
58+
59+ >>> # Configure STG mode options
60+ >>> stg_applied_layers_idx = [11] # Layer indices from 0 to 41
61+ >>> stg_scale = 1.0 # Set to 0.0 for CFG
62+ >>> do_rescaling = False
63+
64+ >>> # Generate video frames with STG parameters
65+ >>> frames = pipe(
66+ ... prompt=prompt,
67+ ... stg_applied_layers_idx=stg_applied_layers_idx,
68+ ... stg_scale=stg_scale,
69+ ... do_rescaling=do_rescaling,
70+ ... ).frames[0]
71+ >>> export_to_video(frames, "output.mp4", fps=8)
6572 ```
6673"""
6774
@@ -861,4 +868,4 @@ def __call__(
861868 if not return_dict :
862869 return (video ,)
863870
864- return CogVideoXPipelineOutput (frames = video )
871+ return CogVideoXPipelineOutput (frames = video )
0 commit comments