Skip to content

Commit 54280dd

Browse files
authored
Merge branch 'main' into remote-utils
2 parents 4773420 + 37a5f1b commit 54280dd

39 files changed

+2218
-443
lines changed

.github/workflows/pr_style_bot.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ jobs:
5353
HEADREF: ${{ steps.pr_info.outputs.headRef }}
5454
PRNUMBER: ${{ steps.pr_info.outputs.prNumber }}
5555
run: |
56-
echo "PR number: ${{ env.PRNUMBER }}"
57-
echo "Head Ref: ${{ env.HEADREF }}"
58-
echo "Head Repo Full Name: ${{ env.HEADREPOFULLNAME }}"
56+
echo "PR number: $PRNUMBER"
57+
echo "Head Ref: $HEADREF"
58+
echo "Head Repo Full Name: $HEADREPOFULLNAME"
5959
6060
- name: Set up Python
6161
uses: actions/setup-python@v4
@@ -89,20 +89,20 @@ jobs:
8989
PRNUMBER: ${{ steps.pr_info.outputs.prNumber }}
9090
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
9191
run: |
92-
echo "HEADREPOFULLNAME: ${{ env.HEADREPOFULLNAME }}, HEADREF: ${{ env.HEADREF }}"
92+
echo "HEADREPOFULLNAME: $HEADREPOFULLNAME, HEADREF: $HEADREF"
9393
# Configure git with the Actions bot user
9494
git config user.name "github-actions[bot]"
9595
git config user.email "github-actions[bot]@users.noreply.github.com"
9696
9797
# Make sure your 'origin' remote is set to the contributor's fork
98-
git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/${{ env.HEADREPOFULLNAME }}.git"
98+
git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@github.com/$HEADREPOFULLNAME.git"
9999
100100
# If there are changes after running style/quality, commit them
101101
if [ -n "$(git status --porcelain)" ]; then
102102
git add .
103103
git commit -m "Apply style fixes"
104104
# Push to the original contributor's forked branch
105-
git push origin HEAD:${{ env.HEADREF }}
105+
git push origin HEAD:$HEADREF
106106
echo "changes_pushed=true" >> $GITHUB_OUTPUT
107107
else
108108
echo "No changes to commit."

.github/workflows/pr_tests_gpu.yml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ on:
1111
- "src/diffusers/loaders/lora_base.py"
1212
- "src/diffusers/loaders/lora_pipeline.py"
1313
- "src/diffusers/loaders/peft.py"
14+
- "tests/pipelines/test_pipelines_common.py"
15+
- "tests/models/test_modeling_common.py"
1416
workflow_dispatch:
1517

1618
concurrency:
@@ -104,11 +106,18 @@ jobs:
104106
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
105107
CUBLAS_WORKSPACE_CONFIG: :16:8
106108
run: |
107-
pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
108-
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
109-
-s -v -k "not Flax and not Onnx and $pattern" \
110-
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
111-
tests/pipelines/${{ matrix.module }}
109+
if [ "${{ matrix.module }}" = "ip_adapters" ]; then
110+
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
111+
-s -v -k "not Flax and not Onnx" \
112+
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
113+
tests/pipelines/${{ matrix.module }}
114+
else
115+
pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
116+
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
117+
-s -v -k "not Flax and not Onnx and $pattern" \
118+
--make-reports=tests_pipeline_${{ matrix.module }}_cuda \
119+
tests/pipelines/${{ matrix.module }}
120+
fi
112121
113122
- name: Failure short reports
114123
if: ${{ failure() }}

docs/source/en/api/pipelines/marigold.md

Lines changed: 89 additions & 34 deletions
Large diffs are not rendered by default.

docs/source/en/api/pipelines/overview.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ The table below lists all the pipelines currently available in 🤗 Diffusers an
6565
| [Latte](latte) | text2image |
6666
| [LEDITS++](ledits_pp) | image editing |
6767
| [Lumina-T2X](lumina) | text2image |
68-
| [Marigold](marigold) | depth |
68+
| [Marigold](marigold) | depth-estimation, normals-estimation, intrinsic-decomposition |
6969
| [MultiDiffusion](panorama) | text2image |
7070
| [MusicLDM](musicldm) | text2audio |
7171
| [PAG](pag) | text2image |

docs/source/en/using-diffusers/marigold_usage.md

Lines changed: 312 additions & 173 deletions
Large diffs are not rendered by default.

src/diffusers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@
345345
"Lumina2Text2ImgPipeline",
346346
"LuminaText2ImgPipeline",
347347
"MarigoldDepthPipeline",
348+
"MarigoldIntrinsicsPipeline",
348349
"MarigoldNormalsPipeline",
349350
"MochiPipeline",
350351
"MusicLDMPipeline",
@@ -845,6 +846,7 @@
845846
Lumina2Text2ImgPipeline,
846847
LuminaText2ImgPipeline,
847848
MarigoldDepthPipeline,
849+
MarigoldIntrinsicsPipeline,
848850
MarigoldNormalsPipeline,
849851
MochiPipeline,
850852
MusicLDMPipeline,

src/diffusers/loaders/ip_adapter.py

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_state_dict
2424
from ..utils import (
2525
USE_PEFT_BACKEND,
26+
_get_detailed_type,
2627
_get_model_file,
28+
_is_valid_type,
2729
is_accelerate_available,
2830
is_torch_version,
2931
is_transformers_available,
@@ -577,29 +579,36 @@ def LinearStrengthModel(start, finish, size):
577579
pipeline.set_ip_adapter_scale(ip_strengths)
578580
```
579581
"""
580-
transformer = self.transformer
581-
if not isinstance(scale, list):
582-
scale = [[scale] * transformer.config.num_layers]
583-
elif isinstance(scale, list) and isinstance(scale[0], int) or isinstance(scale[0], float):
584-
if len(scale) != transformer.config.num_layers:
585-
raise ValueError(f"Expected list of {transformer.config.num_layers} scales, got {len(scale)}.")
582+
583+
scale_type = Union[int, float]
584+
num_ip_adapters = self.transformer.encoder_hid_proj.num_ip_adapters
585+
num_layers = self.transformer.config.num_layers
586+
587+
# Single value for all layers of all IP-Adapters
588+
if isinstance(scale, scale_type):
589+
scale = [scale for _ in range(num_ip_adapters)]
590+
# List of per-layer scales for a single IP-Adapter
591+
elif _is_valid_type(scale, List[scale_type]) and num_ip_adapters == 1:
586592
scale = [scale]
593+
# Invalid scale type
594+
elif not _is_valid_type(scale, List[Union[scale_type, List[scale_type]]]):
595+
raise TypeError(f"Unexpected type {_get_detailed_type(scale)} for scale.")
587596

588-
scale_configs = scale
597+
if len(scale) != num_ip_adapters:
598+
raise ValueError(f"Cannot assign {len(scale)} scales to {num_ip_adapters} IP-Adapters.")
589599

590-
key_id = 0
591-
for attn_name, attn_processor in transformer.attn_processors.items():
592-
if isinstance(attn_processor, (FluxIPAdapterJointAttnProcessor2_0)):
593-
if len(scale_configs) != len(attn_processor.scale):
594-
raise ValueError(
595-
f"Cannot assign {len(scale_configs)} scale_configs to "
596-
f"{len(attn_processor.scale)} IP-Adapter."
597-
)
598-
elif len(scale_configs) == 1:
599-
scale_configs = scale_configs * len(attn_processor.scale)
600-
for i, scale_config in enumerate(scale_configs):
601-
attn_processor.scale[i] = scale_config[key_id]
602-
key_id += 1
600+
if any(len(s) != num_layers for s in scale if isinstance(s, list)):
601+
invalid_scale_sizes = {len(s) for s in scale if isinstance(s, list)} - {num_layers}
602+
raise ValueError(
603+
f"Expected list of {num_layers} scales, got {', '.join(str(x) for x in invalid_scale_sizes)}."
604+
)
605+
606+
# Scalars are transformed to lists with length num_layers
607+
scale_configs = [[s] * num_layers if isinstance(s, scale_type) else s for s in scale]
608+
609+
# Set scales. zip over scale_configs prevents going into single transformer layers
610+
for attn_processor, *scale in zip(self.transformer.attn_processors.values(), *scale_configs):
611+
attn_processor.scale = scale
603612

604613
def unload_ip_adapter(self):
605614
"""

src/diffusers/models/attention_processor.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2780,9 +2780,8 @@ def __call__(
27802780

27812781
# IP-adapter
27822782
ip_query = hidden_states_query_proj
2783-
ip_attn_output = None
2784-
# for ip-adapter
2785-
# TODO: support for multiple adapters
2783+
ip_attn_output = torch.zeros_like(hidden_states)
2784+
27862785
for current_ip_hidden_states, scale, to_k_ip, to_v_ip in zip(
27872786
ip_hidden_states, self.scale, self.to_k_ip, self.to_v_ip
27882787
):
@@ -2793,12 +2792,14 @@ def __call__(
27932792
ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
27942793
# the output of sdp = (batch, num_heads, seq_len, head_dim)
27952794
# TODO: add support for attn.scale when we move to Torch 2.1
2796-
ip_attn_output = F.scaled_dot_product_attention(
2795+
current_ip_hidden_states = F.scaled_dot_product_attention(
27972796
ip_query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False
27982797
)
2799-
ip_attn_output = ip_attn_output.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
2800-
ip_attn_output = scale * ip_attn_output
2801-
ip_attn_output = ip_attn_output.to(ip_query.dtype)
2798+
current_ip_hidden_states = current_ip_hidden_states.transpose(1, 2).reshape(
2799+
batch_size, -1, attn.heads * head_dim
2800+
)
2801+
current_ip_hidden_states = current_ip_hidden_states.to(ip_query.dtype)
2802+
ip_attn_output += scale * current_ip_hidden_states
28022803

28032804
return hidden_states, encoder_hidden_states, ip_attn_output
28042805
else:

src/diffusers/models/controlnets/controlnet_union.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -605,12 +605,13 @@ def forward(
605605
controlnet_cond: List[torch.Tensor],
606606
control_type: torch.Tensor,
607607
control_type_idx: List[int],
608-
conditioning_scale: float = 1.0,
608+
conditioning_scale: Union[float, List[float]] = 1.0,
609609
class_labels: Optional[torch.Tensor] = None,
610610
timestep_cond: Optional[torch.Tensor] = None,
611611
attention_mask: Optional[torch.Tensor] = None,
612612
added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
613613
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
614+
from_multi: bool = False,
614615
guess_mode: bool = False,
615616
return_dict: bool = True,
616617
) -> Union[ControlNetOutput, Tuple[Tuple[torch.Tensor, ...], torch.Tensor]]:
@@ -647,6 +648,8 @@ def forward(
647648
Additional conditions for the Stable Diffusion XL UNet.
648649
cross_attention_kwargs (`dict[str]`, *optional*, defaults to `None`):
649650
A kwargs dictionary that if specified is passed along to the `AttnProcessor`.
651+
from_multi (`bool`, defaults to `False`):
652+
Use standard scaling when called from `MultiControlNetUnionModel`.
650653
guess_mode (`bool`, defaults to `False`):
651654
In this mode, the ControlNet encoder tries its best to recognize the input content of the input even if
652655
you remove all prompts. A `guidance_scale` between 3.0 and 5.0 is recommended.
@@ -658,6 +661,9 @@ def forward(
658661
If `return_dict` is `True`, a [`~models.controlnet.ControlNetOutput`] is returned, otherwise a tuple is
659662
returned where the first element is the sample tensor.
660663
"""
664+
if isinstance(conditioning_scale, float):
665+
conditioning_scale = [conditioning_scale] * len(controlnet_cond)
666+
661667
# check channel order
662668
channel_order = self.config.controlnet_conditioning_channel_order
663669

@@ -742,12 +748,16 @@ def forward(
742748
inputs = []
743749
condition_list = []
744750

745-
for cond, control_idx in zip(controlnet_cond, control_type_idx):
751+
for cond, control_idx, scale in zip(controlnet_cond, control_type_idx, conditioning_scale):
746752
condition = self.controlnet_cond_embedding(cond)
747753
feat_seq = torch.mean(condition, dim=(2, 3))
748754
feat_seq = feat_seq + self.task_embedding[control_idx]
749-
inputs.append(feat_seq.unsqueeze(1))
750-
condition_list.append(condition)
755+
if from_multi:
756+
inputs.append(feat_seq.unsqueeze(1))
757+
condition_list.append(condition)
758+
else:
759+
inputs.append(feat_seq.unsqueeze(1) * scale)
760+
condition_list.append(condition * scale)
751761

752762
condition = sample
753763
feat_seq = torch.mean(condition, dim=(2, 3))
@@ -759,10 +769,13 @@ def forward(
759769
x = layer(x)
760770

761771
controlnet_cond_fuser = sample * 0.0
762-
for idx, condition in enumerate(condition_list[:-1]):
772+
for (idx, condition), scale in zip(enumerate(condition_list[:-1]), conditioning_scale):
763773
alpha = self.spatial_ch_projs(x[:, idx])
764774
alpha = alpha.unsqueeze(-1).unsqueeze(-1)
765-
controlnet_cond_fuser += condition + alpha
775+
if from_multi:
776+
controlnet_cond_fuser += condition + alpha
777+
else:
778+
controlnet_cond_fuser += condition + alpha * scale
766779

767780
sample = sample + controlnet_cond_fuser
768781

@@ -806,12 +819,13 @@ def forward(
806819
# 6. scaling
807820
if guess_mode and not self.config.global_pool_conditions:
808821
scales = torch.logspace(-1, 0, len(down_block_res_samples) + 1, device=sample.device) # 0.1 to 1.0
809-
scales = scales * conditioning_scale
822+
if from_multi:
823+
scales = scales * conditioning_scale[0]
810824
down_block_res_samples = [sample * scale for sample, scale in zip(down_block_res_samples, scales)]
811825
mid_block_res_sample = mid_block_res_sample * scales[-1] # last one
812-
else:
813-
down_block_res_samples = [sample * conditioning_scale for sample in down_block_res_samples]
814-
mid_block_res_sample = mid_block_res_sample * conditioning_scale
826+
elif from_multi:
827+
down_block_res_samples = [sample * conditioning_scale[0] for sample in down_block_res_samples]
828+
mid_block_res_sample = mid_block_res_sample * conditioning_scale[0]
815829

816830
if self.config.global_pool_conditions:
817831
down_block_res_samples = [

src/diffusers/models/controlnets/multicontrolnet_union.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,12 @@ def forward(
4747
guess_mode: bool = False,
4848
return_dict: bool = True,
4949
) -> Union[ControlNetOutput, Tuple]:
50+
down_block_res_samples, mid_block_res_sample = None, None
5051
for i, (image, ctype, ctype_idx, scale, controlnet) in enumerate(
5152
zip(controlnet_cond, control_type, control_type_idx, conditioning_scale, self.nets)
5253
):
54+
if scale == 0.0:
55+
continue
5356
down_samples, mid_sample = controlnet(
5457
sample=sample,
5558
timestep=timestep,
@@ -63,12 +66,13 @@ def forward(
6366
attention_mask=attention_mask,
6467
added_cond_kwargs=added_cond_kwargs,
6568
cross_attention_kwargs=cross_attention_kwargs,
69+
from_multi=True,
6670
guess_mode=guess_mode,
6771
return_dict=return_dict,
6872
)
6973

7074
# merge samples
71-
if i == 0:
75+
if down_block_res_samples is None and mid_block_res_sample is None:
7276
down_block_res_samples, mid_block_res_sample = down_samples, mid_sample
7377
else:
7478
down_block_res_samples = [

0 commit comments

Comments
 (0)