Skip to content

Conversation

@yiyixuxu
Copy link
Collaborator

import torch
from diffusers import StableDiffusion3Pipeline
import time

repo = "stabilityai/stable-diffusion-3-medium-diffusers"
device = "cuda:3"
dtype = torch.float16

def precompute_sd3_prompt_embeds(prompt: str, offload_seq: str=None):
    pipe = StableDiffusion3Pipeline.from_pretrained(
        repo, 
        transformer=None,
        vae=None,
        scheduler=None,
        torch_dtype=dtype)
    if offload_seq is None:
        pipe = pipe.to(device)
    elif offload_seq == "default":
        pipe.enable_model_cpu_offload(device=device)
    else:
        pipe.enable_model_cpu_offload(model_cpu_offload_seq=offload_seq, device=device)

    with torch.no_grad():
        prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = pipe.encode_prompt(
            prompt=prompt,
            prompt_2=None,
            prompt_3=None,
            max_sequence_length=512,)
    
    return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds

def test_sd3(prompt, offload_seq=None):
    
    print(f"\nTesting with offload_seq: {offload_seq}")
    
    prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = precompute_sd3_prompt_embeds(prompt, offload_seq)
    


# Test different offload sequences
prompt = "A cat holding a sign that says hello world"
offload_sequences = [
    None,
    "default",
    "text_encoder_2->text_encoder_3->transformer->vae",
    "text_encoder_1->text_encoder_3->transformer->vae"
]
print_message = [
    f"no offloading, device: {device}",
    f"default offloading for sd3: text_encoder->text_encoder_2->text_encoder_3->transformer->vae",
    f"text_encoder_2->text_encoder_3->transformer->vae",
    f"text_encoder->text_encoder_3->transformer->vae",
]

for seq, msg in zip(offload_sequences, print_message):
    try:
        print(f" testing: {msg}")
        test_sd3(prompt, seq)
    except Exception as e:
        print(f"\nError with sequence {seq}:")
        print(str(e))
    finally:
        torch.cuda.empty_cache()

output

 testing: no offloading, device: cuda:3

Testing with offload_seq: None
Loading pipeline components...:  17%|████████████████▎                                                                                 | 1/6 [00:00<00:01,  2.72it/s]You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  3.95it/s]
Loading pipeline components...: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  3.55it/s]
 
 after get_clip_prompt_embeds(1):
 text_encoder: cuda:3
 text_encoder_2: cuda:3
 text_encoder_3: cuda:3
 transformer: None
 vae: None
 
 after get_clip_prompt_embeds(2):
 text_encoder: cuda:3
 text_encoder_2: cuda:3
 text_encoder_3: cuda:3
 transformer: None
 vae: None
 
 after get_t5_prompt_embeds:
 text_encoder: cuda:3
 text_encoder_2: cuda:3
 text_encoder_3: cuda:3
 transformer: None
 vae: None

 testing: default offloading for sd3: text_encoder->text_encoder_2->text_encoder_3->transformer->vae

Testing with offload_seq: default
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  4.09it/s]
Loading pipeline components...: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  3.94it/s]
 
 after get_clip_prompt_embeds(1):
 text_encoder: cuda:3
 text_encoder_2: cpu
 text_encoder_3: cpu
 transformer: None
 vae: None
 
 after get_clip_prompt_embeds(2):
 text_encoder: cpu
 text_encoder_2: cuda:3
 text_encoder_3: cpu
 transformer: None
 vae: None
 
 after get_t5_prompt_embeds:
 text_encoder: cpu
 text_encoder_2: cpu
 text_encoder_3: cuda:3
 transformer: None
 vae: None


 testing: text_encoder_2->text_encoder_3->transformer->vae

Testing with offload_seq: text_encoder_2->text_encoder_3->transformer->vae
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  4.31it/s]
Loading pipeline components...: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  4.02it/s]
 
 after get_clip_prompt_embeds(1):
 text_encoder: cuda:3
 text_encoder_2: cpu
 text_encoder_3: cpu
 transformer: None
 vae: None
 
 after get_clip_prompt_embeds(2):
 text_encoder: cuda:3
 text_encoder_2: cuda:3
 text_encoder_3: cpu
 transformer: None
 vae: None
 
 after get_t5_prompt_embeds:
 text_encoder: cuda:3
 text_encoder_2: cpu
 text_encoder_3: cuda:3
 transformer: None
 vae: None


 testing: text_encoder->text_encoder_3->transformer->vae

Testing with offload_seq: text_encoder->text_encoder_3->transformer->vae
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  4.50it/s]
Loading pipeline components...: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:01<00:00,  4.08it/s]
 
 after get_clip_prompt_embeds(1):
 text_encoder: cuda:3
 text_encoder_2: cpu
 text_encoder_3: cpu
 transformer: None
 vae: None
 
 after get_clip_prompt_embeds(2):
 text_encoder: cuda:3
 text_encoder_2: cuda:3
 text_encoder_3: cpu
 transformer: None
 vae: None
 
 after get_t5_prompt_embeds:
 text_encoder: cpu
 text_encoder_2: cuda:3
 text_encoder_3: cuda:3
 transformer: None
 vae: None

@HuggingFaceDocBuilderDev

The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update.

@github-actions
Copy link
Contributor

This issue has been automatically marked as stale because it has not had recent activity. If you think this still needs to be addressed please comment on this thread.

Please note that issues that do not follow the contributing guidelines are likely to be ignored.

@github-actions github-actions bot added the stale Issues that haven't received updates label Jan 13, 2025
@yiyixuxu yiyixuxu closed this Jan 13, 2025
@yiyixuxu yiyixuxu deleted the dynamic-offload branch January 13, 2025 19:19
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

stale Issues that haven't received updates

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants