Teriks
diff --git a/‎dgenerate/console/schemas/submodels.json
Lines changed: 1 addition & 1 deletion b/‎dgenerate/console/schemas/submodels.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎dgenerate/pipelinewrapper/pipelines.py
Lines changed: 58 additions & 4 deletions b/‎dgenerate/pipelinewrapper/pipelines.py
Lines changed: 58 additions & 4 deletions
diff --git a/‎dgenerate/pipelinewrapper/uris/controlneturi.py
Lines changed: 37 additions & 4 deletions b/‎dgenerate/pipelinewrapper/uris/controlneturi.py
Lines changed: 37 additions & 4 deletions
diff --git a/‎docs/manual.rst
Lines changed: 64 additions & 3 deletions b/‎docs/manual.rst
Lines changed: 64 additions & 3 deletions
@@ -34,7 +34,7 @@
 import diffusers.loaders
 import diffusers.loaders.single_file_utils
 import diffusers.quantizers.quantization_config
-import torch.nn
+import torch
 import torch.nn
 import transformers
 
@@ -1226,7 +1226,8 @@ def create_diffusion_pipeline(
     :param quantizer_uri: Optional ``--quantizer`` URI value
     :param quantizer_map: Collection of pipeline submodule names to which quantization should be applied when
         ``quantizer_uri`` is provided. Valid values include: ``unet``, ``transformer``, ``text_encoder``,
-        ``text_encoder_2``, ``text_encoder_3``. If ``None``, all supported modules will be quantized.
+        ``text_encoder_2``, ``text_encoder_3``, and ``controlnet``. If ``None``, all supported modules will be quantized,
+        except for ``controlnet``.
     :param pag: Use perturbed attention guidance?
     :param safety_checker: Safety checker enabled? default is ``False``
     :param original_config: Optional original training config .yaml file path when loading a single file checkpoint.
@@ -2217,6 +2218,17 @@ def get_device_map_for_quantizer(quantizer_uri):
             if quantizer_class is _uris.SDNQQuantizerUri:
                 sdnq_cast_hack = True
 
+    # Check controlnet URIs
+    if controlnet_uris:
+        for controlnet_uri in controlnet_uris:
+            parsed_uri = _uris.ControlNetUri.parse(controlnet_uri, model_type=model_type)
+            uri_quant_check.append(parsed_uri)
+            if parsed_uri.quantizer:
+                manual_quantizer_components.add('controlnet')
+                quantizer_class = _uris.get_quantizer_uri_class(parsed_uri.quantizer)
+                if quantizer_class is _uris.SDNQQuantizerUri:
+                    sdnq_cast_hack = True
+
     if quantizer_uri or any(p.quantizer for p in uri_quant_check):
         # for now, just knock out anything cached on the gpu, such as the last pipeline
         # the quantized pipeline modules are likely going to go straight onto the GPU
@@ -2367,6 +2379,20 @@ def sdnq_forward(og_forward, model, *args, **kwargs):
                 kwargs[k] = v.to(dtype=model.dtype)
         return og_forward(*args, **kwargs)
 
+    def controlnet_quant_forward(og_forward, model, *args, **kwargs):
+        """
+        Forward function for quantized controlnets that casts inputs to the model's dtype.
+        This is needed because diffusers doesn't handle controlnet quantization state internally.
+        """
+        args = list(args)
+        for i, arg in enumerate(args):
+            if isinstance(arg, torch.Tensor):
+                args[i] = arg.to(dtype=model.dtype)
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                kwargs[k] = v.to(dtype=model.dtype)
+        return og_forward(*args, **kwargs)
+
     def load_unet(uri: _uris.UNetUri, unet_class):
         unet_model = uri.load(
             variant_fallback=variant,
@@ -2736,13 +2762,41 @@ def load_default_text_encoder(encoder, encoder_name):
 
             parsed_controlnet_uris.append(parsed_controlnet_uri)
 
-            new_net = parsed_controlnet_uri.load(
+            # Apply global quantizer if controlnet doesn't have
+            # its own quantizer and should be quantized
+            controlnet_uri_to_load = parsed_controlnet_uri
+            if not parsed_controlnet_uri.quantizer and should_apply_quantizer('controlnet'):
+                # Create a new URI with the global quantizer
+                controlnet_uri_to_load = _uris.ControlNetUri(
+                    model=parsed_controlnet_uri.model,
+                    revision=parsed_controlnet_uri.revision,
+                    variant=parsed_controlnet_uri.variant,
+                    subfolder=parsed_controlnet_uri.subfolder,
+                    dtype=parsed_controlnet_uri.dtype,
+                    scale=parsed_controlnet_uri.scale,
+                    start=parsed_controlnet_uri.start,
+                    end=parsed_controlnet_uri.end,
+                    mode=parsed_controlnet_uri.mode,
+                    quantizer=quantizer_uri,
+                    model_type=parsed_controlnet_uri.model_type
+                )
+
+            new_net = controlnet_uri_to_load.load(
                 use_auth_token=auth_token,
                 dtype_fallback=dtype,
                 local_files_only=local_files_only,
-                no_cache=model_cpu_offload or sequential_cpu_offload
+                no_cache=model_cpu_offload or sequential_cpu_offload,
+                device_map=get_device_map_for_quantizer(controlnet_uri_to_load.quantizer)
             )
 
+            # Apply casting hack for quantized controlnets
+            if controlnet_uri_to_load.quantizer:
+                new_net.forward = functools.partial(
+                    controlnet_quant_forward,
+                    new_net.forward,
+                    new_net
+                )
+
             _messages.debug_log(lambda:
                                 f'Added Torch ControlNet: "{controlnet_uri}" '
                                 f'to pipeline: "{pipeline_class.__name__}"')
 
@@ -36,7 +36,7 @@
 from dgenerate.pipelinewrapper.uris import util as _util
 
 _controlnet_uri_parser = _textprocessing.ConceptUriParser(
-    'ControlNet', ['scale', 'start', 'end', 'mode', 'revision', 'variant', 'subfolder', 'dtype'])
+    'ControlNet', ['scale', 'start', 'end', 'mode', 'revision', 'variant', 'subfolder', 'dtype', 'quantizer'])
 
 _controlnet_cache = _d_memoize.create_object_cache(
     'controlnet', cache_type=_memory.SizedConstrainedObjectCache
@@ -172,6 +172,13 @@ def model_type(self) -> _enums.ModelType:
         """
         return self._model_type
 
+    @property
+    def quantizer(self) -> _types.OptionalUri:
+        """
+        --quantizer URI override
+        """
+        return self._quantizer
+
     def __init__(self,
                  model: str,
                  revision: _types.OptionalString,
@@ -182,6 +189,7 @@ def __init__(self,
                  start: float = 0.0,
                  end: float = 1.0,
                  mode: int | str | FluxControlNetUnionUriModes | SDXLControlNetUnionUriModes | None = None,
+                 quantizer: _types.OptionalUri = None,
                  model_type: _enums.ModelType = _enums.ModelType.SD):
         """
         :param model: model path
@@ -193,15 +201,24 @@ def __init__(self,
         :param start: controlnet guidance start value
         :param end: controlnet guidance end value
         :param mode: Flux / SDXL Union controlnet mode.
+        :param quantizer: --quantizer URI override
         :param model_type: Model type this ControlNet will be attached to.
 
-        :raises InvalidControlNetUriError: If ``dtype`` is passed an invalid data type string.
+        :raises InvalidControlNetUriError: If ``dtype`` is passed an invalid data type string,
+            or if ``model`` points to a single file and ``quantizer`` is specified (not supported).
         """
 
+        if _hfhub.is_single_file_model_load(model):
+            if quantizer:
+                raise _exceptions.InvalidControlNetUriError(
+                    'specifying a ControlNet quantizer URI is only supported for Hugging Face '
+                    'repository loads from a repo slug or disk path, single file loads are not supported.')
+
         self._model = model
         self._revision = revision
         self._variant = variant
         self._subfolder = subfolder
+        self._quantizer = quantizer
         self._model_type = model_type
 
         if isinstance(mode, str):
@@ -232,6 +249,7 @@ def load(self,
              use_auth_token: _types.OptionalString = None,
              local_files_only: bool = False,
              no_cache: bool = False,
+             device_map: str | None = None,
              model_class:
              type[diffusers.ControlNetModel] |
              type[diffusers.ControlNetUnionModel] |
@@ -255,6 +273,8 @@ def load(self,
 
         :param no_cache: If True, force the returned object not to be cached by the memoize decorator.
 
+        :param device_map: device placement strategy for quantized models, defaults to ``None``
+
         :param model_class: What class of controlnet model should be loaded?
             if ``None`` is specified, load based off :py:attr:`ControlNetUri.model_type`
             and provided URI arguments.
@@ -283,6 +303,7 @@ def cache_all(e):
                               use_auth_token,
                               local_files_only,
                               no_cache,
+                              device_map,
                               model_class)
 
 
@@ -305,6 +326,7 @@ def _load(self,
               use_auth_token: _types.OptionalString = None,
               local_files_only: bool = False,
               no_cache: bool = False,
+              device_map: str | None = None,
               model_class:
               type[diffusers.ControlNetModel] |
               type[diffusers.ControlNetUnionModel] |
@@ -329,6 +351,14 @@ def _load(self,
         torch_dtype = _enums.get_torch_dtype(
             dtype_fallback if self.dtype is None else self.dtype)
 
+        if self.quantizer:
+            quant_config = _util.get_quantizer_uri_class(
+                self.quantizer,
+                _exceptions.InvalidControlNetUriError
+            ).parse(self.quantizer).to_config(torch_dtype)
+        else:
+            quant_config = None
+
         if single_file_load_path:
 
             estimated_memory_usage = _pipelinewrapper_util.estimate_model_memory_use(
@@ -366,7 +396,9 @@ def _load(self,
                 subfolder=self.subfolder,
                 torch_dtype=torch_dtype,
                 token=use_auth_token,
-                local_files_only=local_files_only)
+                local_files_only=local_files_only,
+                quantization_config=quant_config,
+                device_map=device_map)
 
         _messages.debug_log('Estimated Torch ControlNet Memory Use:',
                             _memory.bytes_best_human_unit(estimated_memory_usage))
@@ -376,7 +408,7 @@ def _load(self,
         # noinspection PyTypeChecker
         return new_net, _d_memoize.CachedObjectMetadata(
             size=estimated_memory_usage,
-            skip=no_cache
+            skip=self.quantizer or no_cache
         )
 
     @staticmethod
@@ -450,6 +482,7 @@ def parse(uri: _types.Uri,
                 start=start,
                 end=end,
                 mode=mode,
+                quantizer=r.args.get('quantizer', None),
                 model_type=model_type
             )
 
 
@@ -9677,7 +9677,8 @@ Quantization
 ============
 
 Quantization via ``bitsandbytes`` and ``sdnq`` is supported for certain
-diffusion submodels, for instance, the unet/transformer, and all text encoders.
+diffusion submodels, for instance, the unet/transformer, all text encoders,
+and controlnet models.
 
 It is also supported for certain plugins which utilize LLMs, such as the
 ``magicprompt`` upscaler, and ``llm4gen`` prompt weighter.
@@ -9700,7 +9701,8 @@ diffusion pipeline as it loads.
 
 You can control which sub modules of the diffusion pipeline get quantized
 by using the ``--quantizer-map`` argument, which accepts a list
-of ``diffusers`` module names, e.g.
+of ``diffusers`` module names, e.g. ``unet``, ``text_encoder``, ``text_encoder_2``, 
+``transformer``, ``controlnet``, etc.
 
 .. code-block:: bash
 
@@ -9720,7 +9722,7 @@ of ``diffusers`` module names, e.g.
 
 
 Quantization URI can also be supplied via sub-model URIs, the arguments
-``--unet``, ``--transformer``, and ``--text-encoders`` all support a ``quantizer``
+``--unet``, ``--transformer``, ``--text-encoders``, and ``--control-nets`` all support a ``quantizer``
 sub URI argument for specifying the quantization backend for that particular sub-model.
 
 This allows you to set specific quantization settings for sub-models individually.
@@ -9743,6 +9745,65 @@ dgenerate as a URI argument seperator.
     --prompts "a cute cat"
 
 
+ControlNet Quantization
+-----------------------
+ControlNet models are **NOT** quantized by default when using the global ``--quantizer`` 
+argument. To quantize ControlNets, you must either:
+
+1. Add ``controlnet`` to the ``--quantizer-map`` list to apply global quantization
+2. Specify individual quantization settings per ControlNet using the ``quantizer`` URI argument
+
+.. code-block:: bash
+
+    #!/usr/bin/env bash
+
+    # Method 1: Global quantization with controlnet in quantizer-map
+
+    dgenerate stabilityai/stable-diffusion-xl-base-1.0 \
+    --model-type sdxl \
+    --dtype float16 \
+    --variant fp16 \
+    --quantizer "bnb;bits=8" \
+    --quantizer-map unet text_encoder text_encoder_2 controlnet \
+    --control-nets "diffusers/controlnet-canny-sdxl-1.0" \
+    --inference-steps 30 \
+    --guidance-scales 5 \
+    --prompts "a cute cat"
+
+.. code-block:: bash
+
+    #!/usr/bin/env bash
+
+    # Method 2: Individual ControlNet quantization
+
+    dgenerate stabilityai/stable-diffusion-xl-base-1.0 \
+    --model-type sdxl \
+    --dtype float16 \
+    --variant fp16 \
+    --control-nets 'diffusers/controlnet-canny-sdxl-1.0;quantizer="bnb;bits=4"' \
+    --inference-steps 30 \
+    --guidance-scales 5 \
+    --prompts "a cute cat"
+
+.. code-block:: bash
+
+    #!/usr/bin/env bash
+
+    # ControlNet NOT quantized, only unet and text encoders
+
+    dgenerate stabilityai/stable-diffusion-xl-base-1.0 \
+    --model-type sdxl \
+    --dtype float16 \
+    --variant fp16 \
+    --quantizer "bnb;bits=8" \
+    --control-nets "diffusers/controlnet-canny-sdxl-1.0" \
+    --inference-steps 30 \
+    --guidance-scales 5 \
+    --prompts "a cute cat"
+
+ControlNet quantization is only supported for Hugging Face repository loads 
+and local directory paths. Single file ControlNet loads do not support quantization.
+
 Quantizer usage documentation can be obtained with ``--quantizer-help`` or the
 equivalent ``\quantizer_help`` config directive, you can use this argument or
 directive to list quantization backend names, when you supply backend names as