- 
                Notifications
    You must be signed in to change notification settings 
- Fork 6.5k
Closed
Closed
Copy link
Description
I want to save HiDream model in int4 (only text encoder 3 and transformer, one at a time).
This code save the other 3 (TE1, TE2 and VAE) but does not save text_encoder_3. Even waited for 30m. If I do not supply TE1, TE2,TE4 and VAE it throws error.
Any suggestions please.
import torch
from transformers import PreTrainedTokenizerFast, LlamaForCausalLM, T5EncoderModel
from optimum.quanto import freeze, qint4, quantize, quantization_map
from diffusers import (
    UniPCMultistepScheduler,
    HiDreamImagePipeline,
    HiDreamImageTransformer2DModel,
)
base_repo = "HiDream-ai/HiDream-I1-Full"
output_dir = "HiDream-I1-Full-int4"
tokenizer_4 = PreTrainedTokenizerFast.from_pretrained(
    "meta-llama/Meta-Llama-3.1-8B-Instruct"
)
text_encoder_4 = LlamaForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3.1-8B-Instruct",
    output_hidden_states=True,
    output_attentions=True,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
)
quantize(text_encoder_4, weights=qint4)
freeze(text_encoder_4)
# Load and quantize text_encoder_3 only
text_encoder_3 = T5EncoderModel.from_pretrained(
    base_repo,
    subfolder="text_encoder_3", 
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
)
quantize(text_encoder_3, weights=qint4)
freeze(text_encoder_3)
# Set the scheduler as specified
scheduler = UniPCMultistepScheduler(
    flow_shift=3.0,
    prediction_type="flow_prediction",
    use_flow_sigmas=True,
)
# Load the pipeline with default components (except text_encoder_3)
pipe = HiDreamImagePipeline.from_pretrained(
    base_repo,
    text_encoder_3=text_encoder_3,  # Use our quantized text_encoder_3
    tokenizer_4=tokenizer_4,
    text_encoder_4=text_encoder_4,
    transformer=None,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
)
pipe.scheduler = scheduler
# Save the pipeline with the quantized text_encoder_3
pipe.save_pretrained(output_dir)
print(f"Model successfully saved to {output_dir} with quantized text_encoder_3")
# If you want to verify the model was saved correctly:
print("Verifying saved model...")
# loaded_pipe = HiDreamImagePipeline.from_pretrained(output_dir, torch_dtype=torch.bfloat16)
print("Model loaded successfully!")It just stays there
(sddw-dev) C:\aiOWN\diffuser_webui>python HiDream-I1-Full-int4_SAVE.py
C:\Users\nitin\miniconda3\envs\sddw-dev\Lib\site-packages\transformers\generation\configuration_utils.py:817: UserWarning: `return_dict_in_generate` is NOT set to `True`, but `output_attentions` is. When `return_dict_in_generate` is not `True`, `output_attentions` is ignored.
  warnings.warn(
C:\Users\nitin\miniconda3\envs\sddw-dev\Lib\site-packages\transformers\generation\configuration_utils.py:817: UserWarning: `return_dict_in_generate` is NOT set to `True`, but `output_hidden_states` is. When `return_dict_in_generate` is not `True`, `output_hidden_states` is ignored.
  warnings.warn(
Loading checkpoint shards: 100%|ββββββββββββββββββββββββββββββββββββ| 4/4 [00:00<00:00, 18.41it/s]
Loading checkpoint shards: 100%|ββββββββββββββββββββββββββββββββββββ| 2/2 [00:02<00:00,  1.39s/it]
Loading pipeline components...: 100%|βββββββββββββββββββββββββββββ| 10/10 [00:07<00:00,  1.34it/s]
Metadata
Metadata
Assignees
Labels
No labels

