Skip to content

Commit c88cb16

Browse files
committed
refactor: remove device_map parameter for model loading and add pipeline.to("cuda") for GPU allocation
1 parent 1e26139 commit c88cb16

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

docs/source/en/api/pipelines/skyreels_v2.md

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -150,29 +150,27 @@ From the original repo:
150150
import torch
151151
from diffusers import AutoModel, SkyReelsV2DiffusionForcingPipeline, UniPCMultistepScheduler
152152
from diffusers.utils import export_to_video
153-
# For faster loading into the GPU
154-
os.environ["HF_ENABLE_PARALLEL_LOADING"] = "yes"
155153

156154

157155
model_id = "Skywork/SkyReels-V2-DF-1.3B-540P-Diffusers"
158-
vae = AutoModel.from_pretrained(model_id,
159-
subfolder="vae",
160-
torch_dtype=torch.float32,
161-
device_map="cuda")
156+
vae = AutoModel.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
162157

163158
pipeline = SkyReelsV2DiffusionForcingPipeline.from_pretrained(
164159
model_id,
165160
vae=vae,
166161
torch_dtype=torch.bfloat16,
167-
device_map="cuda"
168162
)
163+
pipeline.to("cuda")
169164
flow_shift = 8.0 # 8.0 for T2V, 5.0 for I2V
170165
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
171166

172167
# Some acceleration helpers
173168
# Be sure to install Flash Attention: https://github.com/Dao-AILab/flash-attention#installation-and-features
174-
# Normally 14 min., with compile_repeated_blocks(fullgraph=True) 12 min., with Flash Attention too 5.5 min at A100.
175-
#pipeline.transformer.set_attention_backend("flash")
169+
# Normally 14 min., with compile_repeated_blocks(fullgraph=True) 12 min., with Flash Attention too less min. at A100.
170+
# If you want to follow the original implementation in terms of attentions:
171+
#for block in pipeline.transformer.blocks:
172+
# block.attn1.set_attention_backend("_native_cudnn")
173+
# block.attn2.set_attention_backend("flash_varlen") # or "_flash_varlen_3"
176174
#pipeline.transformer.compile_repeated_blocks(fullgraph=True)
177175

178176
prompt = "A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through the window."
@@ -211,10 +209,11 @@ from diffusers.utils import export_to_video, load_image
211209

212210

213211
model_id = "Skywork/SkyReels-V2-DF-1.3B-720P-Diffusers"
214-
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32, device_map="cuda")
212+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
215213
pipeline = SkyReelsV2DiffusionForcingImageToVideoPipeline.from_pretrained(
216-
model_id, vae=vae, torch_dtype=torch.bfloat16, device_map="cuda"
214+
model_id, vae=vae, torch_dtype=torch.bfloat16
217215
)
216+
pipeline.to("cuda")
218217
flow_shift = 5.0 # 8.0 for T2V, 5.0 for I2V
219218
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
220219

@@ -273,10 +272,11 @@ from diffusers.utils import export_to_video, load_video
273272

274273

275274
model_id = "Skywork/SkyReels-V2-DF-1.3B-720P-Diffusers"
276-
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32, device_map="cuda")
275+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
277276
pipeline = SkyReelsV2DiffusionForcingVideoToVideoPipeline.from_pretrained(
278-
model_id, vae=vae, torch_dtype=torch.bfloat16, device_map="cuda"
277+
model_id, vae=vae, torch_dtype=torch.bfloat16
279278
)
279+
pipeline.to("cuda")
280280
flow_shift = 5.0 # 8.0 for T2V, 5.0 for I2V
281281
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, flow_shift=flow_shift)
282282

0 commit comments

Comments
 (0)