From 721375bb8197cce97eca770877e9c30367fd6d36 Mon Sep 17 00:00:00 2001 From: DN6 Date: Tue, 13 May 2025 14:48:20 +0530 Subject: [PATCH 1/7] update --- .../api/models/hidream_image_transformer.md | 23 +++++++++++++++++++ src/diffusers/loaders/single_file_model.py | 5 ++++ src/diffusers/loaders/single_file_utils.py | 13 +++++++++++ 3 files changed, 41 insertions(+) diff --git a/docs/source/en/api/models/hidream_image_transformer.md b/docs/source/en/api/models/hidream_image_transformer.md index 4218e7f56bec..3c84a2afad4f 100644 --- a/docs/source/en/api/models/hidream_image_transformer.md +++ b/docs/source/en/api/models/hidream_image_transformer.md @@ -21,6 +21,29 @@ from diffusers import HiDreamImageTransformer2DModel transformer = HiDreamImageTransformer2DModel.from_pretrained("HiDream-ai/HiDream-I1-Full", subfolder="transformer", torch_dtype=torch.bfloat16) ``` +## Loading GGUF quantized checkpoints + +GGUF checkpoints for the `HiDreamImageTransformer2DModel` can we be loaded using `~FromOriginalModelMixin.from_single_file` + +```python +from diffusers import HiDreamImageTransformer2DModel + +ckpt_path = "https://huggingface.co/city96/HiDream-I1-Dev-gguf/blob/main/hidream-i1-dev-Q2_K.gguf" +transformer = HiDreamImageTransformer2DModel.from_single_file(ckpt_path, torch_dtype=torch.bfloat16) +``` + +If you are trying to use a GGUF checkpoint for the `HiDream-ai/HiDream-E1-Full` model, you will have to pass in a `config` argument to properly configure the model. This is because the HiDream I1 and E1 models share the same state dict keys, so it is currently not possible to automatically infer the model type from the checkpoint itself. + +```python +from diffusers import HiDreamImageTransformer2DModel + +ckpt_path = "https://huggingface.co/ND911/HiDream_e1_full_bf16-ggufs/blob/main/hidream_e1_full_bf16-Q2_K.gguf" + +transformer = HiDreamImageTransformer2DModel.from_single_file(ckpt_path, config="HiDream-ai/HiDream-E1-Full", subfolder="transformer", torch_dtype=torch.bfloat16) +``` + + + ## HiDreamImageTransformer2DModel [[autodoc]] HiDreamImageTransformer2DModel diff --git a/src/diffusers/loaders/single_file_model.py b/src/diffusers/loaders/single_file_model.py index a2f27b765a1b..8da4bc498239 100644 --- a/src/diffusers/loaders/single_file_model.py +++ b/src/diffusers/loaders/single_file_model.py @@ -31,6 +31,7 @@ convert_autoencoder_dc_checkpoint_to_diffusers, convert_controlnet_checkpoint, convert_flux_transformer_checkpoint_to_diffusers, + convert_hidream_transformer_to_diffusers, convert_hunyuan_video_transformer_to_diffusers, convert_ldm_unet_checkpoint, convert_ldm_vae_checkpoint, @@ -133,6 +134,10 @@ "checkpoint_mapping_fn": convert_wan_vae_to_diffusers, "default_subfolder": "vae", }, + "HiDreamImageTransformer2DModel": { + "checkpoint_mapping_fn": convert_hidream_transformer_to_diffusers, + "default_subfolder": "transformer", + }, } diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 3a2855df2d7d..5cdc3819188a 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -126,6 +126,7 @@ ], "wan": ["model.diffusion_model.head.modulation", "head.modulation"], "wan_vae": "decoder.middle.0.residual.0.gamma", + "hidream": "double_stream_blocks.0.block.adaLN_modulation.1.bias", } DIFFUSERS_DEFAULT_PIPELINE_PATHS = { @@ -190,6 +191,7 @@ "wan-t2v-1.3B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"}, "wan-t2v-14B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.1-T2V-14B-Diffusers"}, "wan-i2v-14B": {"pretrained_model_name_or_path": "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"}, + "hidream": {"pretrained_model_name_or_path": "HiDream-ai/HiDream-I1-Dev"}, } # Use to configure model sample size when original config is provided @@ -701,6 +703,8 @@ def infer_diffusers_model_type(checkpoint): elif CHECKPOINT_KEY_NAMES["wan_vae"] in checkpoint: # All Wan models use the same VAE so we can use the same default model repo to fetch the config model_type = "wan-t2v-14B" + elif CHECKPOINT_KEY_NAMES["hidream"] in checkpoint: + model_type = "hidream" else: model_type = "v1" @@ -3293,3 +3297,12 @@ def convert_wan_vae_to_diffusers(checkpoint, **kwargs): converted_state_dict[key] = value return converted_state_dict + + +def convert_hidream_transformer_to_diffusers(checkpoint, **kwargs): + keys = list(checkpoint.keys()) + for k in keys: + if "model.diffusion_model." in k: + checkpoint[k.replace("model.diffusion_model.", "")] = checkpoint.pop(k) + + return checkpoint From 2a19302e6522ff2106933843bd0d88d82c3ea5a8 Mon Sep 17 00:00:00 2001 From: DN6 Date: Tue, 13 May 2025 14:50:13 +0530 Subject: [PATCH 2/7] update --- .../api/models/hidream_image_transformer.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/source/en/api/models/hidream_image_transformer.md b/docs/source/en/api/models/hidream_image_transformer.md index 3c84a2afad4f..b7af6f4c89e2 100644 --- a/docs/source/en/api/models/hidream_image_transformer.md +++ b/docs/source/en/api/models/hidream_image_transformer.md @@ -26,24 +26,33 @@ transformer = HiDreamImageTransformer2DModel.from_pretrained("HiDream-ai/HiDream GGUF checkpoints for the `HiDreamImageTransformer2DModel` can we be loaded using `~FromOriginalModelMixin.from_single_file` ```python -from diffusers import HiDreamImageTransformer2DModel +from diffusers import GGUFQuantizationConfig, HiDreamImageTransformer2DModel ckpt_path = "https://huggingface.co/city96/HiDream-I1-Dev-gguf/blob/main/hidream-i1-dev-Q2_K.gguf" -transformer = HiDreamImageTransformer2DModel.from_single_file(ckpt_path, torch_dtype=torch.bfloat16) +transformer = HiDreamImageTransformer2DModel.from_single_file( + ckpt_path, + quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), + torch_dtype=torch.bfloat16 +) ``` If you are trying to use a GGUF checkpoint for the `HiDream-ai/HiDream-E1-Full` model, you will have to pass in a `config` argument to properly configure the model. This is because the HiDream I1 and E1 models share the same state dict keys, so it is currently not possible to automatically infer the model type from the checkpoint itself. ```python -from diffusers import HiDreamImageTransformer2DModel +from diffusers import GGUFQuantizationConfig, HiDreamImageTransformer2DModel ckpt_path = "https://huggingface.co/ND911/HiDream_e1_full_bf16-ggufs/blob/main/hidream_e1_full_bf16-Q2_K.gguf" -transformer = HiDreamImageTransformer2DModel.from_single_file(ckpt_path, config="HiDream-ai/HiDream-E1-Full", subfolder="transformer", torch_dtype=torch.bfloat16) +transformer = HiDreamImageTransformer2DModel.from_single_file( + ckpt_path, + quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), + config="HiDream-ai/HiDream-E1-Full", + subfolder="transformer", + torch_dtype=torch.bfloat16 +) ``` - ## HiDreamImageTransformer2DModel [[autodoc]] HiDreamImageTransformer2DModel From 4436f54a8053d4386ebf8a8f36115e3dee9c6908 Mon Sep 17 00:00:00 2001 From: DN6 Date: Tue, 13 May 2025 14:56:12 +0530 Subject: [PATCH 3/7] update --- .../models/transformers/transformer_hidream_image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/models/transformers/transformer_hidream_image.py b/src/diffusers/models/transformers/transformer_hidream_image.py index 06f47fcbaf40..77902dcf5852 100644 --- a/src/diffusers/models/transformers/transformer_hidream_image.py +++ b/src/diffusers/models/transformers/transformer_hidream_image.py @@ -5,7 +5,7 @@ import torch.nn.functional as F from ...configuration_utils import ConfigMixin, register_to_config -from ...loaders import PeftAdapterMixin +from ...loaders import FromOriginalModelMixin, PeftAdapterMixin from ...models.modeling_outputs import Transformer2DModelOutput from ...models.modeling_utils import ModelMixin from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers @@ -602,7 +602,7 @@ def forward( ) -class HiDreamImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin): +class HiDreamImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin): _supports_gradient_checkpointing = True _no_split_modules = ["HiDreamImageTransformerBlock", "HiDreamImageSingleTransformerBlock"] From 971794454f863142a0adea9d57947e8321e0dd0f Mon Sep 17 00:00:00 2001 From: DN6 Date: Tue, 13 May 2025 15:44:14 +0530 Subject: [PATCH 4/7] update --- docs/source/en/api/models/hidream_image_transformer.md | 6 ++++-- tests/quantization/gguf/test_gguf.py | 8 ++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/source/en/api/models/hidream_image_transformer.md b/docs/source/en/api/models/hidream_image_transformer.md index b7af6f4c89e2..238d3007bb35 100644 --- a/docs/source/en/api/models/hidream_image_transformer.md +++ b/docs/source/en/api/models/hidream_image_transformer.md @@ -21,11 +21,12 @@ from diffusers import HiDreamImageTransformer2DModel transformer = HiDreamImageTransformer2DModel.from_pretrained("HiDream-ai/HiDream-I1-Full", subfolder="transformer", torch_dtype=torch.bfloat16) ``` -## Loading GGUF quantized checkpoints +## Loading GGUF quantized checkpoints for HiDream-I1 -GGUF checkpoints for the `HiDreamImageTransformer2DModel` can we be loaded using `~FromOriginalModelMixin.from_single_file` +GGUF checkpoints for the `HiDreamImageTransformer2DModel` can be loaded using `~FromOriginalModelMixin.from_single_file` ```python +import torch from diffusers import GGUFQuantizationConfig, HiDreamImageTransformer2DModel ckpt_path = "https://huggingface.co/city96/HiDream-I1-Dev-gguf/blob/main/hidream-i1-dev-Q2_K.gguf" @@ -39,6 +40,7 @@ transformer = HiDreamImageTransformer2DModel.from_single_file( If you are trying to use a GGUF checkpoint for the `HiDream-ai/HiDream-E1-Full` model, you will have to pass in a `config` argument to properly configure the model. This is because the HiDream I1 and E1 models share the same state dict keys, so it is currently not possible to automatically infer the model type from the checkpoint itself. ```python +import torch from diffusers import GGUFQuantizationConfig, HiDreamImageTransformer2DModel ckpt_path = "https://huggingface.co/ND911/HiDream_e1_full_bf16-ggufs/blob/main/hidream_e1_full_bf16-Q2_K.gguf" diff --git a/tests/quantization/gguf/test_gguf.py b/tests/quantization/gguf/test_gguf.py index 9f54ecf6c67c..2cab220aa025 100644 --- a/tests/quantization/gguf/test_gguf.py +++ b/tests/quantization/gguf/test_gguf.py @@ -12,6 +12,7 @@ FluxPipeline, FluxTransformer2DModel, GGUFQuantizationConfig, + HiDreamImageTransformer2DModel, SD3Transformer2DModel, StableDiffusion3Pipeline, ) @@ -549,3 +550,10 @@ def test_lora_loading(self): max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) self.assertTrue(max_diff < 1e-3) + + +class HiDreamGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): + ckpt_path = "https://huggingface.co/city96/HiDream-I1-Dev-gguf/blob/main/hidream-i1-dev-Q2_K.gguf" + torch_dtype = torch.bfloat16 + model_cls = HiDreamImageTransformer2DModel + expected_memory_use_in_gb = 8 From 406a656d9a2922aa651ab1630d44b1044972b81a Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Tue, 13 May 2025 14:10:56 +0200 Subject: [PATCH 5/7] update --- .../hidream_image/pipeline_hidream_image.py | 7 ++++--- tests/quantization/gguf/test_gguf.py | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py b/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py index 6fe74cbd9acc..c1f4d7cc532c 100644 --- a/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py +++ b/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py @@ -36,11 +36,11 @@ Examples: ```py >>> import torch - >>> from transformers import PreTrainedTokenizerFast, LlamaForCausalLM - >>> from diffusers import UniPCMultistepScheduler, HiDreamImagePipeline + >>> from transformers import AutoTokenizer, LlamaForCausalLM + >>> from diffusers import HiDreamImagePipeline - >>> tokenizer_4 = PreTrainedTokenizerFast.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct") + >>> tokenizer_4 = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct") >>> text_encoder_4 = LlamaForCausalLM.from_pretrained( ... "meta-llama/Meta-Llama-3.1-8B-Instruct", ... output_hidden_states=True, @@ -901,6 +901,7 @@ def __call__( pooled_prompt_embeds=pooled_prompt_embeds, negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, device=device, + dtype=self.dtype, num_images_per_prompt=num_images_per_prompt, max_sequence_length=max_sequence_length, lora_scale=lora_scale, diff --git a/tests/quantization/gguf/test_gguf.py b/tests/quantization/gguf/test_gguf.py index 2cab220aa025..db8688a09c09 100644 --- a/tests/quantization/gguf/test_gguf.py +++ b/tests/quantization/gguf/test_gguf.py @@ -557,3 +557,23 @@ class HiDreamGGUFSingleFileTests(GGUFSingleFileTesterMixin, unittest.TestCase): torch_dtype = torch.bfloat16 model_cls = HiDreamImageTransformer2DModel expected_memory_use_in_gb = 8 + + def get_dummy_inputs(self): + return { + "hidden_states": torch.randn((1, 16, 128, 128), generator=torch.Generator("cpu").manual_seed(0)).to( + torch_device, self.torch_dtype + ), + "encoder_hidden_states_t5": torch.randn( + (1, 128, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "encoder_hidden_states_llama3": torch.randn( + (32, 1, 128, 4096), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "pooled_embeds": torch.randn( + (1, 2048), + generator=torch.Generator("cpu").manual_seed(0), + ).to(torch_device, self.torch_dtype), + "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + } From 5de8cb7ddb3b72c78eb7e3308cd5251cafe13a7a Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Tue, 13 May 2025 15:51:09 +0200 Subject: [PATCH 6/7] update --- src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py | 1 - tests/quantization/gguf/test_gguf.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py b/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py index c1f4d7cc532c..17bf0a3fe8c6 100644 --- a/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py +++ b/src/diffusers/pipelines/hidream_image/pipeline_hidream_image.py @@ -901,7 +901,6 @@ def __call__( pooled_prompt_embeds=pooled_prompt_embeds, negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, device=device, - dtype=self.dtype, num_images_per_prompt=num_images_per_prompt, max_sequence_length=max_sequence_length, lora_scale=lora_scale, diff --git a/tests/quantization/gguf/test_gguf.py b/tests/quantization/gguf/test_gguf.py index db8688a09c09..ae3900459de2 100644 --- a/tests/quantization/gguf/test_gguf.py +++ b/tests/quantization/gguf/test_gguf.py @@ -575,5 +575,5 @@ def get_dummy_inputs(self): (1, 2048), generator=torch.Generator("cpu").manual_seed(0), ).to(torch_device, self.torch_dtype), - "timestep": torch.tensor([1]).to(torch_device, self.torch_dtype), + "timesteps": torch.tensor([1]).to(torch_device, self.torch_dtype), } From 4e2c34699201676168d4b6128d30c31e6239b48a Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Tue, 13 May 2025 16:08:01 +0200 Subject: [PATCH 7/7] update --- .../en/api/models/hidream_image_transformer.md | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/docs/source/en/api/models/hidream_image_transformer.md b/docs/source/en/api/models/hidream_image_transformer.md index 238d3007bb35..5dbf40b5a14c 100644 --- a/docs/source/en/api/models/hidream_image_transformer.md +++ b/docs/source/en/api/models/hidream_image_transformer.md @@ -37,24 +37,6 @@ transformer = HiDreamImageTransformer2DModel.from_single_file( ) ``` -If you are trying to use a GGUF checkpoint for the `HiDream-ai/HiDream-E1-Full` model, you will have to pass in a `config` argument to properly configure the model. This is because the HiDream I1 and E1 models share the same state dict keys, so it is currently not possible to automatically infer the model type from the checkpoint itself. - -```python -import torch -from diffusers import GGUFQuantizationConfig, HiDreamImageTransformer2DModel - -ckpt_path = "https://huggingface.co/ND911/HiDream_e1_full_bf16-ggufs/blob/main/hidream_e1_full_bf16-Q2_K.gguf" - -transformer = HiDreamImageTransformer2DModel.from_single_file( - ckpt_path, - quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), - config="HiDream-ai/HiDream-E1-Full", - subfolder="transformer", - torch_dtype=torch.bfloat16 -) -``` - - ## HiDreamImageTransformer2DModel [[autodoc]] HiDreamImageTransformer2DModel