huggingface
diff --git a/‎examples/server/requirements.txt‎
Lines changed: 9 additions & 6 deletions b/‎examples/server/requirements.txt‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎src/diffusers/configuration_utils.py‎
Lines changed: 4 additions & 1 deletion b/‎src/diffusers/configuration_utils.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/diffusers/loaders/single_file_model.py‎
Lines changed: 1 addition & 4 deletions b/‎src/diffusers/loaders/single_file_model.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎src/diffusers/loaders/single_file_utils.py‎
Lines changed: 1 addition & 7 deletions b/‎src/diffusers/loaders/single_file_utils.py‎
Lines changed: 1 addition & 7 deletions
diff --git a/‎src/diffusers/loaders/transformer_flux.py‎
Lines changed: 1 addition & 3 deletions b/‎src/diffusers/loaders/transformer_flux.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/diffusers/loaders/transformer_sd3.py‎
Lines changed: 1 addition & 3 deletions b/‎src/diffusers/loaders/transformer_sd3.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/diffusers/loaders/unet.py‎
Lines changed: 1 addition & 3 deletions b/‎src/diffusers/loaders/unet.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/diffusers/models/modeling_utils.py‎
Lines changed: 7 additions & 5 deletions b/‎src/diffusers/models/modeling_utils.py‎
Lines changed: 7 additions & 5 deletions
@@ -1,10 +1,10 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile requirements.in -o requirements.txt
-aiohappyeyeballs==2.4.3
+aiohappyeyeballs==2.6.1
     # via aiohttp
-aiohttp==3.10.10
+aiohttp==3.12.14
     # via -r requirements.in
-aiosignal==1.3.1
+aiosignal==1.4.0
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
@@ -29,7 +29,6 @@ filelock==3.16.1
     #   huggingface-hub
     #   torch
     #   transformers
-    #   triton
 frozenlist==1.5.0
     # via
     #   aiohttp
@@ -111,7 +110,9 @@ prometheus-client==0.21.0
 prometheus-fastapi-instrumentator==7.0.0
     # via -r requirements.in
 propcache==0.2.0
-    # via yarl
+    # via
+    #   aiohttp
+    #   yarl
 py-consul==1.5.3
     # via -r requirements.in
 pydantic==2.9.2
@@ -155,7 +156,9 @@ triton==3.3.0
     # via torch
 typing-extensions==4.12.2
     # via
+    #   aiosignal
     #   anyio
+    #   exceptiongroup
     #   fastapi
     #   huggingface-hub
     #   multidict
@@ -168,5 +171,5 @@ urllib3==2.5.0
     # via requests
 uvicorn==0.32.0
     # via -r requirements.in
-yarl==1.16.0
+yarl==1.18.3
     # via aiohttp
@@ -763,4 +763,7 @@ def from_config(cls, config: Union[FrozenDict, Dict[str, Any]] = None, return_un
         # resolve remapping
         remapped_class = _fetch_remapped_cls_from_config(config, cls)
 
-        return remapped_class.from_config(config, return_unused_kwargs, **kwargs)
+        if remapped_class is cls:
+            return super(LegacyConfigMixin, remapped_class).from_config(config, return_unused_kwargs, **kwargs)
+        else:
+            return remapped_class.from_config(config, return_unused_kwargs, **kwargs)
@@ -24,7 +24,7 @@
 from .. import __version__
 from ..quantizers import DiffusersAutoQuantizer
 from ..utils import deprecate, is_accelerate_available, logging
-from ..utils.torch_utils import device_synchronize, empty_device_cache
+from ..utils.torch_utils import empty_device_cache
 from .single_file_utils import (
     SingleFileComponentError,
     convert_animatediff_checkpoint_to_diffusers,
@@ -431,10 +431,7 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
                 keep_in_fp32_modules=keep_in_fp32_modules,
                 unexpected_keys=unexpected_keys,
             )
-            # Ensure tensors are correctly placed on device by synchronizing before returning control to user. This is
-            # required because we move tensors with non_blocking=True, which is slightly faster for model loading.
             empty_device_cache()
-            device_synchronize()
         else:
             _, unexpected_keys = model.load_state_dict(diffusers_format_checkpoint, strict=False)
 
 
@@ -46,7 +46,7 @@
 )
 from ..utils.constants import DIFFUSERS_REQUEST_TIMEOUT
 from ..utils.hub_utils import _get_model_file
-from ..utils.torch_utils import device_synchronize, empty_device_cache
+from ..utils.torch_utils import empty_device_cache
 
 
 if is_transformers_available():
@@ -1690,10 +1690,7 @@ def create_diffusers_clip_model_from_ldm(
 
     if is_accelerate_available():
         load_model_dict_into_meta(model, diffusers_format_checkpoint, dtype=torch_dtype)
-        # Ensure tensors are correctly placed on device by synchronizing before returning control to user. This is
-        # required because we move tensors with non_blocking=True, which is slightly faster for model loading.
         empty_device_cache()
-        device_synchronize()
     else:
         model.load_state_dict(diffusers_format_checkpoint, strict=False)
 
@@ -2153,10 +2150,7 @@ def create_diffusers_t5_model_from_checkpoint(
 
     if is_accelerate_available():
         load_model_dict_into_meta(model, diffusers_format_checkpoint, dtype=torch_dtype)
-        # Ensure tensors are correctly placed on device by synchronizing before returning control to user. This is
-        # required because we move tensors with non_blocking=True, which is slightly faster for model loading.
         empty_device_cache()
-        device_synchronize()
     else:
         model.load_state_dict(diffusers_format_checkpoint)
 
 
@@ -19,7 +19,7 @@
 )
 from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
 from ..utils import is_accelerate_available, is_torch_version, logging
-from ..utils.torch_utils import device_synchronize, empty_device_cache
+from ..utils.torch_utils import empty_device_cache
 
 
 if is_accelerate_available():
@@ -82,7 +82,6 @@ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_us
             device_map = {"": self.device}
             load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
             empty_device_cache()
-            device_synchronize()
 
         return image_projection
 
@@ -158,7 +157,6 @@ def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_
                 key_id += 1
 
         empty_device_cache()
-        device_synchronize()
 
         return attn_procs
 
 
@@ -18,7 +18,7 @@
 from ..models.embeddings import IPAdapterTimeImageProjection
 from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
 from ..utils import is_accelerate_available, is_torch_version, logging
-from ..utils.torch_utils import device_synchronize, empty_device_cache
+from ..utils.torch_utils import empty_device_cache
 
 
 logger = logging.get_logger(__name__)
@@ -82,7 +82,6 @@ def _convert_ip_adapter_attn_to_diffusers(
                 )
 
         empty_device_cache()
-        device_synchronize()
 
         return attn_procs
 
@@ -152,7 +151,6 @@ def _convert_ip_adapter_image_proj_to_diffusers(
             device_map = {"": self.device}
             load_model_dict_into_meta(image_proj, updated_state_dict, device_map=device_map, dtype=self.dtype)
             empty_device_cache()
-            device_synchronize()
 
         return image_proj
 
 
@@ -43,7 +43,7 @@
     is_torch_version,
     logging,
 )
-from ..utils.torch_utils import device_synchronize, empty_device_cache
+from ..utils.torch_utils import empty_device_cache
 from .lora_base import _func_optionally_disable_offloading
 from .lora_pipeline import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE, TEXT_ENCODER_NAME, UNET_NAME
 from .utils import AttnProcsLayers
@@ -755,7 +755,6 @@ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_us
             device_map = {"": self.device}
             load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
             empty_device_cache()
-            device_synchronize()
 
         return image_projection
 
@@ -854,7 +853,6 @@ def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_
                 key_id += 2
 
         empty_device_cache()
-        device_synchronize()
 
         return attn_procs
 
 
@@ -62,7 +62,7 @@
     load_or_create_model_card,
     populate_model_card,
 )
-from ..utils.torch_utils import device_synchronize, empty_device_cache
+from ..utils.torch_utils import empty_device_cache
 from .model_loading_utils import (
     _caching_allocator_warmup,
     _determine_device_map,
@@ -1540,10 +1540,7 @@ def _load_pretrained_model(
                     assign_to_params_buffers = check_support_param_buffer_assignment(model, state_dict)
                 error_msgs += _load_state_dict_into_model(model, state_dict, assign_to_params_buffers)
 
-        # Ensure tensors are correctly placed on device by synchronizing before returning control to user. This is
-        # required because we move tensors with non_blocking=True, which is slightly faster for model loading.
         empty_device_cache()
-        device_synchronize()
 
         if offload_index is not None and len(offload_index) > 0:
             save_offload_index(offload_index, offload_folder)
@@ -1880,4 +1877,9 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         # resolve remapping
         remapped_class = _fetch_remapped_cls_from_config(config, cls)
 
-        return remapped_class.from_pretrained(pretrained_model_name_or_path, **kwargs_copy)
+        if remapped_class is cls:
+            return super(LegacyModelMixin, remapped_class).from_pretrained(
+                pretrained_model_name_or_path, **kwargs_copy
+            )
+        else:
+            return remapped_class.from_pretrained(pretrained_model_name_or_path, **kwargs_copy)